From 3d44208aaaeca516eb08a90c98635543cae2bd4d Mon Sep 17 00:00:00 2001
From: pacien
Date: Tue, 27 Nov 2018 20:26:35 +0100
Subject: implement lzss encoding
---
src/lzsschain.nim | 36 +++++++++++++++++++++++++++++
src/lzssencoder.nim | 58 ++++++++++++++++++++++++++++++++++++++++++++++
src/lzssnode.nim | 39 +++++++++++++++++++++++++++++++
src/matchtable.nim | 32 ++++++++++++++++++++++++++
src/polyfill.nim | 42 ++++++++++++++++++++++++++++++++++
tests/tlzsschain.nim | 30 ++++++++++++++++++++++++
tests/tlzssencoder.nim | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
tests/tlzssnode.nim | 26 +++++++++++++++++++++
tests/tmatchtable.nim | 35 ++++++++++++++++++++++++++++
tests/tpolyfill.nim | 27 ++++++++++++++++++++++
10 files changed, 387 insertions(+)
create mode 100644 src/lzsschain.nim
create mode 100644 src/lzssencoder.nim
create mode 100644 src/lzssnode.nim
create mode 100644 src/matchtable.nim
create mode 100644 src/polyfill.nim
create mode 100644 tests/tlzsschain.nim
create mode 100644 tests/tlzssencoder.nim
create mode 100644 tests/tlzssnode.nim
create mode 100644 tests/tmatchtable.nim
create mode 100644 tests/tpolyfill.nim
diff --git a/src/lzsschain.nim b/src/lzsschain.nim
new file mode 100644
index 0000000..8203cb8
--- /dev/null
+++ b/src/lzsschain.nim
@@ -0,0 +1,36 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists, tables, sugar
+import polyfill, integers, lzssnode
+
+const maxChainByteLength = 32_000 * wordBitLength
+
+type LzssChain* =
+ SinglyLinkedList[LzssNode]
+
+proc lzssChain*(): LzssChain =
+ initSinglyLinkedList[LzssNode]()
+
+proc decode*(lzssChain: LzssChain): seq[uint8] =
+ result = newSeqOfCap[uint8](maxChainByteLength)
+ for node in lzssChain.items:
+ case node.kind:
+ of character:
+ result.add(node.character)
+ of reference:
+ let absolutePos = result.len - node.relativePos
+ result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim
new file mode 100644
index 0000000..05f3a16
--- /dev/null
+++ b/src/lzssencoder.nim
@@ -0,0 +1,58 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import polyfill, matchtable, lzssnode, lzsschain
+
+const matchGroupLength = 3
+const maxRefByteLength = high(uint8).int + matchGroupLength
+let emptySinglyLinkedList = initSinglyLinkedList[int]()
+
+proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
+ result = skipFirst
+ let maxPrefixLength = min(min(a.len, b.len), maxLength)
+ while result < maxPrefixLength and a[result] == b[result]: result += 1
+
+proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
+ for startIndex in candidatePos.items:
+ let prefixLength = commonPrefixLength(
+ searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
+ if prefixLength > result.length: result = (prefixLength, startIndex)
+ if prefixLength >= maxRefByteLength: return
+
+proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
+ for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
+ let group = buffer[cursor..<(cursor + matchGroupLength)]
+ matchTable.addMatch(group, cursor)
+
+proc lzssEncode*(buf: openArray[uint8]): LzssChain =
+ result = initSinglyLinkedList[LzssNode]()
+ let matchTable = initMatchTable(seq[uint8], int)
+ var cursor = 0
+ while cursor < buf.len() - matchGroupLength:
+ let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
+ let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
+ if prefix.length > 0:
+ result.append(lzssReference(prefix.length, cursor - prefix.pos))
+ cursor += prefix.length
+ else:
+ result.append(lzssCharacter(buf[cursor]))
+ cursor += 1
+ if cursor - prefix.length >= matchGroupLength:
+ matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor)
+ while cursor < buf.len:
+ result.append(lzssCharacter(buf[cursor]))
+ cursor += 1
diff --git a/src/lzssnode.nim b/src/lzssnode.nim
new file mode 100644
index 0000000..de5958d
--- /dev/null
+++ b/src/lzssnode.nim
@@ -0,0 +1,39 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+type LzssNodeKind* = enum
+ character,
+ reference
+
+type LzssNode* = object
+ case kind*: LzssNodeKind
+ of character:
+ character*: uint8
+ of reference:
+ length*: int
+ relativePos*: int
+
+proc lzssCharacter*(value: uint8): LzssNode =
+ LzssNode(kind: character, character: value)
+
+proc lzssReference*(length, relativePos: int): LzssNode =
+ LzssNode(kind: reference, length: length, relativePos: relativePos)
+
+proc `==`*(a, b: LzssNode): bool =
+ if a.kind != b.kind: return false
+ case a.kind:
+ of character: a.character == b.character
+ of reference: a.length == b.length and a.relativePos == b.relativePos
diff --git a/src/matchtable.nim b/src/matchtable.nim
new file mode 100644
index 0000000..5be652c
--- /dev/null
+++ b/src/matchtable.nim
@@ -0,0 +1,32 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import tables, lists
+import polyfill
+
+type MatchTable*[K, V] =
+ TableRef[K, SinglyLinkedList[V]]
+
+proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] =
+ newTable[K, SinglyLinkedList[V]]()
+
+proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] =
+ matchTable.getOrDefault(pattern, initSinglyLinkedList[V]())
+
+proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) =
+ var matchList = matchTable.matchList(pattern)
+ polyfill.prepend(matchList, value)
+ matchTable[pattern] = matchList
diff --git a/src/polyfill.nim b/src/polyfill.nim
new file mode 100644
index 0000000..b252953
--- /dev/null
+++ b/src/polyfill.nim
@@ -0,0 +1,42 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+
+# https://github.com/nim-lang/Nim/pull/9805
+
+proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
+ ## prepends a node to `L`. Efficiency: O(1).
+ n.next = L.head
+ L.head = n
+ if L.tail == nil: L.tail = n
+
+proc prepend*[T](L: var SinglyLinkedList[T], value: T) =
+ ## prepends a node to `L`. Efficiency: O(1).
+ polyfill.prepend(L, newSinglyLinkedNode(value))
+
+proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
+ ## appends a node `n` to `L`. Efficiency: O(1).
+ n.next = nil
+ if L.tail != nil:
+ assert(L.tail.next == nil)
+ L.tail.next = n
+ L.tail = n
+ if L.head == nil: L.head = n
+
+proc append*[T](L: var SinglyLinkedList[T], value: T) =
+ ## appends a value to `L`. Efficiency: O(1).
+ append(L, newSinglyLinkedNode(value))
diff --git a/tests/tlzsschain.nim b/tests/tlzsschain.nim
new file mode 100644
index 0000000..241a0f1
--- /dev/null
+++ b/tests/tlzsschain.nim
@@ -0,0 +1,30 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest
+import polyfill, lzssnode, lzsschain
+
+suite "lzsschain":
+ test "decode":
+ let chainArray = [
+ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
+ lzssCharacter(3), lzssCharacter(4), lzssCharacter(5),
+ lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1),
+ lzssReference(3, 8), lzssCharacter(5),
+ lzssReference(3, 3), lzssCharacter(5)]
+ var chain = lzssChain()
+ for node in chainArray: chain.append(node)
+ check chain.decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5]
diff --git a/tests/tlzssencoder.nim b/tests/tlzssencoder.nim
new file mode 100644
index 0000000..253d0ac
--- /dev/null
+++ b/tests/tlzssencoder.nim
@@ -0,0 +1,62 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest, sequtils, lists, tables
+import matchtable, lzssnode, lzsschain, lzssencoder
+
+suite "lzssencoder":
+ test "commonPrefixLength":
+ check commonPrefixLength([], [], 0, 10) == 0
+ check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 0, 10) == 2
+ check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 1, 10) == 2
+ check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 1, 10) == 2
+ check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 1, 3) == 3
+
+ test "longestPrefix":
+ let buffer = [
+ 0'u8, 1, 2, 9,
+ 0, 1, 2, 3,
+ 0, 1, 2,
+ 0, 1, 2, 3, 4]
+ var candidatePos = initSinglyLinkedList[int]()
+ candidatePos.prepend(0)
+ candidatePos.prepend(4)
+ candidatePos.prepend(8)
+ let result = longestPrefix(candidatePos, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1))
+ check result.pos == 4
+ check result.length == 4
+
+ test "addGroups":
+ let matchTable = initMatchTable(seq[uint8], int)
+ let buffer = toSeq(0'u8..10'u8)
+ matchTable.addGroups(buffer, 0, 1)
+ check matchTable.len == 0
+ matchTable.addGroups(buffer, 2, 9)
+ check matchTable.len == 5
+ check toSeq(matchTable.matchList(@[1'u8, 2, 3]).items).len == 0
+ check toSeq(matchTable.matchList(@[7'u8, 8, 9]).items).len == 0
+ check toSeq(matchTable.matchList(@[2'u8, 3, 4]).items) == @[2]
+ check toSeq(matchTable.matchList(@[4'u8, 5, 6]).items) == @[4]
+ check toSeq(matchTable.matchList(@[6'u8, 7, 8]).items) == @[6]
+
+ test "lzssEncode":
+ let buffer = [0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5]
+ check toSeq(lzssEncode(buffer).items) == @[
+ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
+ lzssCharacter(3), lzssCharacter(4), lzssCharacter(5),
+ lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1),
+ lzssReference(3, 8), lzssCharacter(5),
+ lzssReference(3, 3), lzssCharacter(5)]
diff --git a/tests/tlzssnode.nim b/tests/tlzssnode.nim
new file mode 100644
index 0000000..cb584ab
--- /dev/null
+++ b/tests/tlzssnode.nim
@@ -0,0 +1,26 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest
+import lzssnode
+
+suite "lzssnode":
+ test "equality":
+ check lzssCharacter(1) == lzssCharacter(1)
+ check lzssCharacter(0) != lzssCharacter(1)
+ check lzssReference(0, 1) == lzssReference(0, 1)
+ check lzssReference(1, 0) != lzssReference(0, 1)
+ check lzssCharacter(0) != lzssReference(0, 1)
diff --git a/tests/tmatchtable.nim b/tests/tmatchtable.nim
new file mode 100644
index 0000000..4b21f1d
--- /dev/null
+++ b/tests/tmatchtable.nim
@@ -0,0 +1,35 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest, lists, sequtils, tables
+import matchtable
+
+suite "matchtable":
+ test "matchList":
+ let matchTable = initMatchTable(seq[int], int)
+ check toSeq(matchTable.matchList(@[0, 1, 2]).items).len == 0
+
+ test "addMatch":
+ let matchTable = initMatchTable(seq[int], int)
+ matchTable.addMatch(@[0, 1, 2], 42)
+ matchTable.addMatch(@[2, 1, 0], 24)
+ check matchTable.len == 2
+ check toSeq(matchTable.matchList(@[0, 1, 2]).items) == @[42]
+ check toSeq(matchTable.matchList(@[2, 1, 0]).items) == @[24]
+ matchTable.addMatch(@[0, 1, 2], 1337)
+ check matchTable.len == 2
+ check toSeq(matchTable.matchList(@[0, 1, 2]).items) == @[1337, 42]
+ check toSeq(matchTable.matchList(@[2, 1, 0]).items) == @[24]
diff --git a/tests/tpolyfill.nim b/tests/tpolyfill.nim
new file mode 100644
index 0000000..b48eb77
--- /dev/null
+++ b/tests/tpolyfill.nim
@@ -0,0 +1,27 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest, sugar, lists, tables
+import polyfill
+
+suite "polyfill":
+ test "SinglyLinkedList append":
+ const data = [1, 2, 3, 4, 5, 6]
+ var L: SinglyLinkedList[int]
+ for d in items(data): polyfill.prepend(L, d)
+ for d in items(data): polyfill.append(L, d)
+ check $L == "[6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6]"
+ check 4 in L
--
cgit v1.2.3