From 3d44208aaaeca516eb08a90c98635543cae2bd4d Mon Sep 17 00:00:00 2001 From: pacien Date: Tue, 27 Nov 2018 20:26:35 +0100 Subject: implement lzss encoding --- src/lzsschain.nim | 36 +++++++++++++++++++++++++++++ src/lzssencoder.nim | 58 ++++++++++++++++++++++++++++++++++++++++++++++ src/lzssnode.nim | 39 +++++++++++++++++++++++++++++++ src/matchtable.nim | 32 ++++++++++++++++++++++++++ src/polyfill.nim | 42 ++++++++++++++++++++++++++++++++++ tests/tlzsschain.nim | 30 ++++++++++++++++++++++++ tests/tlzssencoder.nim | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/tlzssnode.nim | 26 +++++++++++++++++++++ tests/tmatchtable.nim | 35 ++++++++++++++++++++++++++++ tests/tpolyfill.nim | 27 ++++++++++++++++++++++ 10 files changed, 387 insertions(+) create mode 100644 src/lzsschain.nim create mode 100644 src/lzssencoder.nim create mode 100644 src/lzssnode.nim create mode 100644 src/matchtable.nim create mode 100644 src/polyfill.nim create mode 100644 tests/tlzsschain.nim create mode 100644 tests/tlzssencoder.nim create mode 100644 tests/tlzssnode.nim create mode 100644 tests/tmatchtable.nim create mode 100644 tests/tpolyfill.nim diff --git a/src/lzsschain.nim b/src/lzsschain.nim new file mode 100644 index 0000000..8203cb8 --- /dev/null +++ b/src/lzsschain.nim @@ -0,0 +1,36 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists, tables, sugar +import polyfill, integers, lzssnode + +const maxChainByteLength = 32_000 * wordBitLength + +type LzssChain* = + SinglyLinkedList[LzssNode] + +proc lzssChain*(): LzssChain = + initSinglyLinkedList[LzssNode]() + +proc decode*(lzssChain: LzssChain): seq[uint8] = + result = newSeqOfCap[uint8](maxChainByteLength) + for node in lzssChain.items: + case node.kind: + of character: + result.add(node.character) + of reference: + let absolutePos = result.len - node.relativePos + result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim new file mode 100644 index 0000000..05f3a16 --- /dev/null +++ b/src/lzssencoder.nim @@ -0,0 +1,58 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import polyfill, matchtable, lzssnode, lzsschain + +const matchGroupLength = 3 +const maxRefByteLength = high(uint8).int + matchGroupLength +let emptySinglyLinkedList = initSinglyLinkedList[int]() + +proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = + result = skipFirst + let maxPrefixLength = min(min(a.len, b.len), maxLength) + while result < maxPrefixLength and a[result] == b[result]: result += 1 + +proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = + for startIndex in candidatePos.items: + let prefixLength = commonPrefixLength( + searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) + if prefixLength > result.length: result = (prefixLength, startIndex) + if prefixLength >= maxRefByteLength: return + +proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = + for cursor in fromPosIncl..(toPosExcl - matchGroupLength): + let group = buffer[cursor..<(cursor + matchGroupLength)] + matchTable.addMatch(group, cursor) + +proc lzssEncode*(buf: openArray[uint8]): LzssChain = + result = initSinglyLinkedList[LzssNode]() + let matchTable = initMatchTable(seq[uint8], int) + var cursor = 0 + while cursor < buf.len() - matchGroupLength: + let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) + let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) + if prefix.length > 0: + result.append(lzssReference(prefix.length, cursor - prefix.pos)) + cursor += prefix.length + else: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 + if cursor - prefix.length >= matchGroupLength: + matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor) + while cursor < buf.len: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 diff --git a/src/lzssnode.nim b/src/lzssnode.nim new file mode 100644 index 0000000..de5958d --- /dev/null +++ b/src/lzssnode.nim @@ -0,0 +1,39 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +type LzssNodeKind* = enum + character, + reference + +type LzssNode* = object + case kind*: LzssNodeKind + of character: + character*: uint8 + of reference: + length*: int + relativePos*: int + +proc lzssCharacter*(value: uint8): LzssNode = + LzssNode(kind: character, character: value) + +proc lzssReference*(length, relativePos: int): LzssNode = + LzssNode(kind: reference, length: length, relativePos: relativePos) + +proc `==`*(a, b: LzssNode): bool = + if a.kind != b.kind: return false + case a.kind: + of character: a.character == b.character + of reference: a.length == b.length and a.relativePos == b.relativePos diff --git a/src/matchtable.nim b/src/matchtable.nim new file mode 100644 index 0000000..5be652c --- /dev/null +++ b/src/matchtable.nim @@ -0,0 +1,32 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tables, lists +import polyfill + +type MatchTable*[K, V] = + TableRef[K, SinglyLinkedList[V]] + +proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] = + newTable[K, SinglyLinkedList[V]]() + +proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] = + matchTable.getOrDefault(pattern, initSinglyLinkedList[V]()) + +proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = + var matchList = matchTable.matchList(pattern) + polyfill.prepend(matchList, value) + matchTable[pattern] = matchList diff --git a/src/polyfill.nim b/src/polyfill.nim new file mode 100644 index 0000000..b252953 --- /dev/null +++ b/src/polyfill.nim @@ -0,0 +1,42 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists + +# https://github.com/nim-lang/Nim/pull/9805 + +proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## prepends a node to `L`. Efficiency: O(1). + n.next = L.head + L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var SinglyLinkedList[T], value: T) = + ## prepends a node to `L`. Efficiency: O(1). + polyfill.prepend(L, newSinglyLinkedNode(value)) + +proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## appends a node `n` to `L`. Efficiency: O(1). + n.next = nil + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n + +proc append*[T](L: var SinglyLinkedList[T], value: T) = + ## appends a value to `L`. Efficiency: O(1). + append(L, newSinglyLinkedNode(value)) diff --git a/tests/tlzsschain.nim b/tests/tlzsschain.nim new file mode 100644 index 0000000..241a0f1 --- /dev/null +++ b/tests/tlzsschain.nim @@ -0,0 +1,30 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest +import polyfill, lzssnode, lzsschain + +suite "lzsschain": + test "decode": + let chainArray = [ + lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), + lzssCharacter(3), lzssCharacter(4), lzssCharacter(5), + lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1), + lzssReference(3, 8), lzssCharacter(5), + lzssReference(3, 3), lzssCharacter(5)] + var chain = lzssChain() + for node in chainArray: chain.append(node) + check chain.decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5] diff --git a/tests/tlzssencoder.nim b/tests/tlzssencoder.nim new file mode 100644 index 0000000..253d0ac --- /dev/null +++ b/tests/tlzssencoder.nim @@ -0,0 +1,62 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest, sequtils, lists, tables +import matchtable, lzssnode, lzsschain, lzssencoder + +suite "lzssencoder": + test "commonPrefixLength": + check commonPrefixLength([], [], 0, 10) == 0 + check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 0, 10) == 2 + check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 1, 10) == 2 + check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 1, 10) == 2 + check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 1, 3) == 3 + + test "longestPrefix": + let buffer = [ + 0'u8, 1, 2, 9, + 0, 1, 2, 3, + 0, 1, 2, + 0, 1, 2, 3, 4] + var candidatePos = initSinglyLinkedList[int]() + candidatePos.prepend(0) + candidatePos.prepend(4) + candidatePos.prepend(8) + let result = longestPrefix(candidatePos, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1)) + check result.pos == 4 + check result.length == 4 + + test "addGroups": + let matchTable = initMatchTable(seq[uint8], int) + let buffer = toSeq(0'u8..10'u8) + matchTable.addGroups(buffer, 0, 1) + check matchTable.len == 0 + matchTable.addGroups(buffer, 2, 9) + check matchTable.len == 5 + check toSeq(matchTable.matchList(@[1'u8, 2, 3]).items).len == 0 + check toSeq(matchTable.matchList(@[7'u8, 8, 9]).items).len == 0 + check toSeq(matchTable.matchList(@[2'u8, 3, 4]).items) == @[2] + check toSeq(matchTable.matchList(@[4'u8, 5, 6]).items) == @[4] + check toSeq(matchTable.matchList(@[6'u8, 7, 8]).items) == @[6] + + test "lzssEncode": + let buffer = [0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5] + check toSeq(lzssEncode(buffer).items) == @[ + lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), + lzssCharacter(3), lzssCharacter(4), lzssCharacter(5), + lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1), + lzssReference(3, 8), lzssCharacter(5), + lzssReference(3, 3), lzssCharacter(5)] diff --git a/tests/tlzssnode.nim b/tests/tlzssnode.nim new file mode 100644 index 0000000..cb584ab --- /dev/null +++ b/tests/tlzssnode.nim @@ -0,0 +1,26 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest +import lzssnode + +suite "lzssnode": + test "equality": + check lzssCharacter(1) == lzssCharacter(1) + check lzssCharacter(0) != lzssCharacter(1) + check lzssReference(0, 1) == lzssReference(0, 1) + check lzssReference(1, 0) != lzssReference(0, 1) + check lzssCharacter(0) != lzssReference(0, 1) diff --git a/tests/tmatchtable.nim b/tests/tmatchtable.nim new file mode 100644 index 0000000..4b21f1d --- /dev/null +++ b/tests/tmatchtable.nim @@ -0,0 +1,35 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest, lists, sequtils, tables +import matchtable + +suite "matchtable": + test "matchList": + let matchTable = initMatchTable(seq[int], int) + check toSeq(matchTable.matchList(@[0, 1, 2]).items).len == 0 + + test "addMatch": + let matchTable = initMatchTable(seq[int], int) + matchTable.addMatch(@[0, 1, 2], 42) + matchTable.addMatch(@[2, 1, 0], 24) + check matchTable.len == 2 + check toSeq(matchTable.matchList(@[0, 1, 2]).items) == @[42] + check toSeq(matchTable.matchList(@[2, 1, 0]).items) == @[24] + matchTable.addMatch(@[0, 1, 2], 1337) + check matchTable.len == 2 + check toSeq(matchTable.matchList(@[0, 1, 2]).items) == @[1337, 42] + check toSeq(matchTable.matchList(@[2, 1, 0]).items) == @[24] diff --git a/tests/tpolyfill.nim b/tests/tpolyfill.nim new file mode 100644 index 0000000..b48eb77 --- /dev/null +++ b/tests/tpolyfill.nim @@ -0,0 +1,27 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest, sugar, lists, tables +import polyfill + +suite "polyfill": + test "SinglyLinkedList append": + const data = [1, 2, 3, 4, 5, 6] + var L: SinglyLinkedList[int] + for d in items(data): polyfill.prepend(L, d) + for d in items(data): polyfill.append(L, d) + check $L == "[6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6]" + check 4 in L -- cgit v1.2.3