aboutsummaryrefslogtreecommitdiff
path: root/src/lzss
diff options
context:
space:
mode:
authorpacien2018-11-30 18:44:20 +0100
committerpacien2018-11-30 18:44:20 +0100
commit5bbe75659aef55542268cbf35c66342cb22ce865 (patch)
tree53c1e79c4195be546ba5762d61eb995a4f9e0530 /src/lzss
parente88f60b63cb05f56a61060a953c726b7a78c0652 (diff)
downloadgziplike-5bbe75659aef55542268cbf35c66342cb22ce865.tar.gz
isolate lzss chain module
Diffstat (limited to 'src/lzss')
-rw-r--r--src/lzss/listpolyfill.nim42
-rw-r--r--src/lzss/lzsschain.nim47
-rw-r--r--src/lzss/lzssencoder.nim58
-rw-r--r--src/lzss/lzssnode.nim39
-rw-r--r--src/lzss/matchtable.nim32
5 files changed, 218 insertions, 0 deletions
diff --git a/src/lzss/listpolyfill.nim b/src/lzss/listpolyfill.nim
new file mode 100644
index 0000000..00b30ee
--- /dev/null
+++ b/src/lzss/listpolyfill.nim
@@ -0,0 +1,42 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists
18
19# https://github.com/nim-lang/Nim/pull/9805
20
21proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
22 ## prepends a node to `L`. Efficiency: O(1).
23 n.next = L.head
24 L.head = n
25 if L.tail == nil: L.tail = n
26
27proc prepend*[T](L: var SinglyLinkedList[T], value: T) =
28 ## prepends a node to `L`. Efficiency: O(1).
29 listpolyfill.prepend(L, newSinglyLinkedNode(value))
30
31proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
32 ## appends a node `n` to `L`. Efficiency: O(1).
33 n.next = nil
34 if L.tail != nil:
35 assert(L.tail.next == nil)
36 L.tail.next = n
37 L.tail = n
38 if L.head == nil: L.head = n
39
40proc append*[T](L: var SinglyLinkedList[T], value: T) =
41 ## appends a value to `L`. Efficiency: O(1).
42 append(L, newSinglyLinkedNode(value))
diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim
new file mode 100644
index 0000000..2ecff9e
--- /dev/null
+++ b/src/lzss/lzsschain.nim
@@ -0,0 +1,47 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists, tables, sugar
18import ../integers, ../huffman/huffmantree
19import listpolyfill, lzssnode
20
21const maxChainByteLength = 32_000 * wordBitLength
22
23type LzssChain* =
24 SinglyLinkedList[LzssNode]
25
26proc lzssChain*(): LzssChain =
27 initSinglyLinkedList[LzssNode]()
28
29proc decode*(lzssChain: LzssChain): seq[uint8] =
30 result = newSeqOfCap[uint8](maxChainByteLength)
31 for node in lzssChain.items:
32 case node.kind:
33 of character:
34 result.add(node.character)
35 of reference:
36 let absolutePos = result.len - node.relativePos
37 result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
38
39proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] =
40 result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]())
41 for node in lzssChain.items:
42 case node.kind:
43 of character:
44 result.characters.inc(node.character)
45 of reference:
46 result.lengths.inc(node.length)
47 result.positions.inc(node.relativePos)
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
new file mode 100644
index 0000000..8b750fb
--- /dev/null
+++ b/src/lzss/lzssencoder.nim
@@ -0,0 +1,58 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists
18import listpolyfill, matchtable, lzssnode, lzsschain
19
20const matchGroupLength = 3
21const maxRefByteLength = high(uint8).int + matchGroupLength
22let emptySinglyLinkedList = initSinglyLinkedList[int]()
23
24proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
25 result = skipFirst
26 let maxPrefixLength = min(min(a.len, b.len), maxLength)
27 while result < maxPrefixLength and a[result] == b[result]: result += 1
28
29proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
30 for startIndex in candidatePos.items:
31 let prefixLength = commonPrefixLength(
32 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
33 if prefixLength > result.length: result = (prefixLength, startIndex)
34 if prefixLength >= maxRefByteLength: return
35
36proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
37 for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
38 let group = buffer[cursor..<(cursor + matchGroupLength)]
39 matchTable.addMatch(group, cursor)
40
41proc lzssEncode*(buf: openArray[uint8]): LzssChain =
42 result = initSinglyLinkedList[LzssNode]()
43 let matchTable = initMatchTable(seq[uint8], int)
44 var cursor = 0
45 while cursor < buf.len() - matchGroupLength:
46 let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
47 let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
48 if prefix.length > 0:
49 result.append(lzssReference(prefix.length, cursor - prefix.pos))
50 cursor += prefix.length
51 else:
52 result.append(lzssCharacter(buf[cursor]))
53 cursor += 1
54 if cursor - prefix.length >= matchGroupLength:
55 matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor)
56 while cursor < buf.len:
57 result.append(lzssCharacter(buf[cursor]))
58 cursor += 1
diff --git a/src/lzss/lzssnode.nim b/src/lzss/lzssnode.nim
new file mode 100644
index 0000000..de5958d
--- /dev/null
+++ b/src/lzss/lzssnode.nim
@@ -0,0 +1,39 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17type LzssNodeKind* = enum
18 character,
19 reference
20
21type LzssNode* = object
22 case kind*: LzssNodeKind
23 of character:
24 character*: uint8
25 of reference:
26 length*: int
27 relativePos*: int
28
29proc lzssCharacter*(value: uint8): LzssNode =
30 LzssNode(kind: character, character: value)
31
32proc lzssReference*(length, relativePos: int): LzssNode =
33 LzssNode(kind: reference, length: length, relativePos: relativePos)
34
35proc `==`*(a, b: LzssNode): bool =
36 if a.kind != b.kind: return false
37 case a.kind:
38 of character: a.character == b.character
39 of reference: a.length == b.length and a.relativePos == b.relativePos
diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim
new file mode 100644
index 0000000..b17ce68
--- /dev/null
+++ b/src/lzss/matchtable.nim
@@ -0,0 +1,32 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import tables, lists
18import listpolyfill
19
20type MatchTable*[K, V] =
21 TableRef[K, SinglyLinkedList[V]]
22
23proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] =
24 newTable[K, SinglyLinkedList[V]]()
25
26proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] =
27 matchTable.getOrDefault(pattern, initSinglyLinkedList[V]())
28
29proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) =
30 var matchList = matchTable.matchList(pattern)
31 listpolyfill.prepend(matchList, value)
32 matchTable[pattern] = matchList