From 3d44208aaaeca516eb08a90c98635543cae2bd4d Mon Sep 17 00:00:00 2001
From: pacien
Date: Tue, 27 Nov 2018 20:26:35 +0100
Subject: implement lzss encoding
---
src/lzsschain.nim | 36 +++++++++++++++++++++++++++++++++
src/lzssencoder.nim | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/lzssnode.nim | 39 +++++++++++++++++++++++++++++++++++
src/matchtable.nim | 32 +++++++++++++++++++++++++++++
src/polyfill.nim | 42 ++++++++++++++++++++++++++++++++++++++
5 files changed, 207 insertions(+)
create mode 100644 src/lzsschain.nim
create mode 100644 src/lzssencoder.nim
create mode 100644 src/lzssnode.nim
create mode 100644 src/matchtable.nim
create mode 100644 src/polyfill.nim
(limited to 'src')
diff --git a/src/lzsschain.nim b/src/lzsschain.nim
new file mode 100644
index 0000000..8203cb8
--- /dev/null
+++ b/src/lzsschain.nim
@@ -0,0 +1,36 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists, tables, sugar
+import polyfill, integers, lzssnode
+
+const maxChainByteLength = 32_000 * wordBitLength
+
+type LzssChain* =
+ SinglyLinkedList[LzssNode]
+
+proc lzssChain*(): LzssChain =
+ initSinglyLinkedList[LzssNode]()
+
+proc decode*(lzssChain: LzssChain): seq[uint8] =
+ result = newSeqOfCap[uint8](maxChainByteLength)
+ for node in lzssChain.items:
+ case node.kind:
+ of character:
+ result.add(node.character)
+ of reference:
+ let absolutePos = result.len - node.relativePos
+ result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim
new file mode 100644
index 0000000..05f3a16
--- /dev/null
+++ b/src/lzssencoder.nim
@@ -0,0 +1,58 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import polyfill, matchtable, lzssnode, lzsschain
+
+const matchGroupLength = 3
+const maxRefByteLength = high(uint8).int + matchGroupLength
+let emptySinglyLinkedList = initSinglyLinkedList[int]()
+
+proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
+ result = skipFirst
+ let maxPrefixLength = min(min(a.len, b.len), maxLength)
+ while result < maxPrefixLength and a[result] == b[result]: result += 1
+
+proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
+ for startIndex in candidatePos.items:
+ let prefixLength = commonPrefixLength(
+ searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
+ if prefixLength > result.length: result = (prefixLength, startIndex)
+ if prefixLength >= maxRefByteLength: return
+
+proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
+ for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
+ let group = buffer[cursor..<(cursor + matchGroupLength)]
+ matchTable.addMatch(group, cursor)
+
+proc lzssEncode*(buf: openArray[uint8]): LzssChain =
+ result = initSinglyLinkedList[LzssNode]()
+ let matchTable = initMatchTable(seq[uint8], int)
+ var cursor = 0
+ while cursor < buf.len() - matchGroupLength:
+ let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
+ let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
+ if prefix.length > 0:
+ result.append(lzssReference(prefix.length, cursor - prefix.pos))
+ cursor += prefix.length
+ else:
+ result.append(lzssCharacter(buf[cursor]))
+ cursor += 1
+ if cursor - prefix.length >= matchGroupLength:
+ matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor)
+ while cursor < buf.len:
+ result.append(lzssCharacter(buf[cursor]))
+ cursor += 1
diff --git a/src/lzssnode.nim b/src/lzssnode.nim
new file mode 100644
index 0000000..de5958d
--- /dev/null
+++ b/src/lzssnode.nim
@@ -0,0 +1,39 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+type LzssNodeKind* = enum
+ character,
+ reference
+
+type LzssNode* = object
+ case kind*: LzssNodeKind
+ of character:
+ character*: uint8
+ of reference:
+ length*: int
+ relativePos*: int
+
+proc lzssCharacter*(value: uint8): LzssNode =
+ LzssNode(kind: character, character: value)
+
+proc lzssReference*(length, relativePos: int): LzssNode =
+ LzssNode(kind: reference, length: length, relativePos: relativePos)
+
+proc `==`*(a, b: LzssNode): bool =
+ if a.kind != b.kind: return false
+ case a.kind:
+ of character: a.character == b.character
+ of reference: a.length == b.length and a.relativePos == b.relativePos
diff --git a/src/matchtable.nim b/src/matchtable.nim
new file mode 100644
index 0000000..5be652c
--- /dev/null
+++ b/src/matchtable.nim
@@ -0,0 +1,32 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import tables, lists
+import polyfill
+
+type MatchTable*[K, V] =
+ TableRef[K, SinglyLinkedList[V]]
+
+proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] =
+ newTable[K, SinglyLinkedList[V]]()
+
+proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] =
+ matchTable.getOrDefault(pattern, initSinglyLinkedList[V]())
+
+proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) =
+ var matchList = matchTable.matchList(pattern)
+ polyfill.prepend(matchList, value)
+ matchTable[pattern] = matchList
diff --git a/src/polyfill.nim b/src/polyfill.nim
new file mode 100644
index 0000000..b252953
--- /dev/null
+++ b/src/polyfill.nim
@@ -0,0 +1,42 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+
+# https://github.com/nim-lang/Nim/pull/9805
+
+proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
+ ## prepends a node to `L`. Efficiency: O(1).
+ n.next = L.head
+ L.head = n
+ if L.tail == nil: L.tail = n
+
+proc prepend*[T](L: var SinglyLinkedList[T], value: T) =
+ ## prepends a node to `L`. Efficiency: O(1).
+ polyfill.prepend(L, newSinglyLinkedNode(value))
+
+proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
+ ## appends a node `n` to `L`. Efficiency: O(1).
+ n.next = nil
+ if L.tail != nil:
+ assert(L.tail.next == nil)
+ L.tail.next = n
+ L.tail = n
+ if L.head == nil: L.head = n
+
+proc append*[T](L: var SinglyLinkedList[T], value: T) =
+ ## appends a value to `L`. Efficiency: O(1).
+ append(L, newSinglyLinkedNode(value))
--
cgit v1.2.3