From 3d44208aaaeca516eb08a90c98635543cae2bd4d Mon Sep 17 00:00:00 2001 From: pacien Date: Tue, 27 Nov 2018 20:26:35 +0100 Subject: implement lzss encoding --- src/lzsschain.nim | 36 +++++++++++++++++++++++++++++++++ src/lzssencoder.nim | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lzssnode.nim | 39 +++++++++++++++++++++++++++++++++++ src/matchtable.nim | 32 +++++++++++++++++++++++++++++ src/polyfill.nim | 42 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 207 insertions(+) create mode 100644 src/lzsschain.nim create mode 100644 src/lzssencoder.nim create mode 100644 src/lzssnode.nim create mode 100644 src/matchtable.nim create mode 100644 src/polyfill.nim (limited to 'src') diff --git a/src/lzsschain.nim b/src/lzsschain.nim new file mode 100644 index 0000000..8203cb8 --- /dev/null +++ b/src/lzsschain.nim @@ -0,0 +1,36 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists, tables, sugar +import polyfill, integers, lzssnode + +const maxChainByteLength = 32_000 * wordBitLength + +type LzssChain* = + SinglyLinkedList[LzssNode] + +proc lzssChain*(): LzssChain = + initSinglyLinkedList[LzssNode]() + +proc decode*(lzssChain: LzssChain): seq[uint8] = + result = newSeqOfCap[uint8](maxChainByteLength) + for node in lzssChain.items: + case node.kind: + of character: + result.add(node.character) + of reference: + let absolutePos = result.len - node.relativePos + result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim new file mode 100644 index 0000000..05f3a16 --- /dev/null +++ b/src/lzssencoder.nim @@ -0,0 +1,58 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import polyfill, matchtable, lzssnode, lzsschain + +const matchGroupLength = 3 +const maxRefByteLength = high(uint8).int + matchGroupLength +let emptySinglyLinkedList = initSinglyLinkedList[int]() + +proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = + result = skipFirst + let maxPrefixLength = min(min(a.len, b.len), maxLength) + while result < maxPrefixLength and a[result] == b[result]: result += 1 + +proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = + for startIndex in candidatePos.items: + let prefixLength = commonPrefixLength( + searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) + if prefixLength > result.length: result = (prefixLength, startIndex) + if prefixLength >= maxRefByteLength: return + +proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = + for cursor in fromPosIncl..(toPosExcl - matchGroupLength): + let group = buffer[cursor..<(cursor + matchGroupLength)] + matchTable.addMatch(group, cursor) + +proc lzssEncode*(buf: openArray[uint8]): LzssChain = + result = initSinglyLinkedList[LzssNode]() + let matchTable = initMatchTable(seq[uint8], int) + var cursor = 0 + while cursor < buf.len() - matchGroupLength: + let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) + let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) + if prefix.length > 0: + result.append(lzssReference(prefix.length, cursor - prefix.pos)) + cursor += prefix.length + else: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 + if cursor - prefix.length >= matchGroupLength: + matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor) + while cursor < buf.len: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 diff --git a/src/lzssnode.nim b/src/lzssnode.nim new file mode 100644 index 0000000..de5958d --- /dev/null +++ b/src/lzssnode.nim @@ -0,0 +1,39 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +type LzssNodeKind* = enum + character, + reference + +type LzssNode* = object + case kind*: LzssNodeKind + of character: + character*: uint8 + of reference: + length*: int + relativePos*: int + +proc lzssCharacter*(value: uint8): LzssNode = + LzssNode(kind: character, character: value) + +proc lzssReference*(length, relativePos: int): LzssNode = + LzssNode(kind: reference, length: length, relativePos: relativePos) + +proc `==`*(a, b: LzssNode): bool = + if a.kind != b.kind: return false + case a.kind: + of character: a.character == b.character + of reference: a.length == b.length and a.relativePos == b.relativePos diff --git a/src/matchtable.nim b/src/matchtable.nim new file mode 100644 index 0000000..5be652c --- /dev/null +++ b/src/matchtable.nim @@ -0,0 +1,32 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tables, lists +import polyfill + +type MatchTable*[K, V] = + TableRef[K, SinglyLinkedList[V]] + +proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] = + newTable[K, SinglyLinkedList[V]]() + +proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] = + matchTable.getOrDefault(pattern, initSinglyLinkedList[V]()) + +proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = + var matchList = matchTable.matchList(pattern) + polyfill.prepend(matchList, value) + matchTable[pattern] = matchList diff --git a/src/polyfill.nim b/src/polyfill.nim new file mode 100644 index 0000000..b252953 --- /dev/null +++ b/src/polyfill.nim @@ -0,0 +1,42 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists + +# https://github.com/nim-lang/Nim/pull/9805 + +proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## prepends a node to `L`. Efficiency: O(1). + n.next = L.head + L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var SinglyLinkedList[T], value: T) = + ## prepends a node to `L`. Efficiency: O(1). + polyfill.prepend(L, newSinglyLinkedNode(value)) + +proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## appends a node `n` to `L`. Efficiency: O(1). + n.next = nil + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n + +proc append*[T](L: var SinglyLinkedList[T], value: T) = + ## appends a value to `L`. Efficiency: O(1). + append(L, newSinglyLinkedNode(value)) -- cgit v1.2.3