From 1524ab71168b7c214a531f796c94962776e9d88a Mon Sep 17 00:00:00 2001 From: pacien Date: Thu, 29 Nov 2018 12:41:59 +0100 Subject: add generic huffman tree builder --- src/huffmantree.nim | 31 +++++++++++++++++++++++++++---- src/lzsschain.nim | 12 +++++++++++- tests/thuffmantree.nim | 33 +++++++++++++++++++++++++++++++-- tests/tlzsschain.nim | 18 +++++++++++++++--- 4 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/huffmantree.nim b/src/huffmantree.nim index 1711879..adcaec7 100644 --- a/src/huffmantree.nim +++ b/src/huffmantree.nim @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import tables, heapqueue import integers, bitreader, bitwriter const valueLengthFieldBitLength* = 6 # 64 @@ -28,19 +29,32 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object left, right: HuffmanTreeNode[T] of leaf: value: T + weight: int proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] = - HuffmanTreeNode[T](kind: branch, left: left, right: right) + HuffmanTreeNode[T](kind: branch, left: left, right: right, weight: left.weight + right.weight) -proc huffmanLeaf*[T](value: T): HuffmanTreeNode[T] = - HuffmanTreeNode[T](kind: leaf, value: value) +proc huffmanLeaf*[T](value: T, weight = 0): HuffmanTreeNode[T] = + HuffmanTreeNode[T](kind: leaf, value: value, weight: weight) proc `==`*[T](a, b: HuffmanTreeNode[T]): bool = - if a.kind != b.kind: return false + if a.kind != b.kind or a.weight != b.weight: return false case a.kind: of branch: a.left == b.left and a.right == b.right of leaf: a.value == b.value +proc `~=`*[T](a, b: HuffmanTreeNode[T]): bool = + if a.kind != b.kind: return false + case a.kind: + of branch: a.left ~= b.left and a.right ~= b.right + of leaf: a.value == b.value + +proc `!~`*[T](a, b: HuffmanTreeNode[T]): bool = + not (a ~= b) + +proc `<`*[T](left, right: HuffmanTreeNode[T]): bool = + left.weight < right.weight + proc maxValue*[T](node: HuffmanTreeNode[T]): T = case node.kind: of branch: max(node.left.maxValue(), node.right.maxValue()) @@ -68,3 +82,12 @@ proc serialise*[T](tree: HuffmanTreeNode[T], bitWriter: BitWriter) = bitWriter.writeBits(valueBitLength, node.value) bitWriter.writeBits(valueLengthFieldBitLength, valueBitLength.uint8) writeNode(tree) + +proc symbolQueue*[T](stats: CountTableRef[T]): HeapQueue[HuffmanTreeNode[T]] = + result = newHeapQueue[HuffmanTreeNode[T]]() + for item, count in stats.pairs: result.push(huffmanLeaf(item, count)) + +proc buildHuffmanTree*[T: SomeUnsignedInt](stats: CountTableRef[T]): HuffmanTreeNode[T] = + var symbolQueue = symbolQueue(stats) + while symbolQueue.len > 1: symbolQueue.push(huffmanBranch(symbolQueue.pop(), symbolQueue.pop())) + result = symbolQueue[0] diff --git a/src/lzsschain.nim b/src/lzsschain.nim index 8203cb8..073aa5e 100644 --- a/src/lzsschain.nim +++ b/src/lzsschain.nim @@ -15,7 +15,7 @@ # along with this program. If not, see . import lists, tables, sugar -import polyfill, integers, lzssnode +import polyfill, integers, lzssnode, huffmantree const maxChainByteLength = 32_000 * wordBitLength @@ -34,3 +34,13 @@ proc decode*(lzssChain: LzssChain): seq[uint8] = of reference: let absolutePos = result.len - node.relativePos result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) + +proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] = + result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]()) + for node in lzssChain.items: + case node.kind: + of character: + result.characters.inc(node.character) + of reference: + result.lengths.inc(node.length) + result.positions.inc(node.relativePos) diff --git a/tests/thuffmantree.nim b/tests/thuffmantree.nim index ec40bdb..705ac17 100644 --- a/tests/thuffmantree.nim +++ b/tests/thuffmantree.nim @@ -14,24 +14,43 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import unittest, streams +import unittest, streams, sequtils, tables, heapqueue import bitreader, bitwriter, huffmantree suite "huffmantree": + let stats = newCountTable(concat(repeat(1'u, 3), repeat(2'u, 1), repeat(3'u, 2))) let tree = huffmanBranch( huffmanLeaf(1'u), huffmanBranch( huffmanLeaf(2'u), huffmanLeaf(3'u))) + test "equivalence": + check huffmanLeaf(12'u) ~= huffmanLeaf(12'u) + check huffmanLeaf(12'u) ~= huffmanLeaf(12'u, 2) + check huffmanLeaf(12'u) !~ huffmanLeaf(21'u) + check huffmanLeaf(12'u) !~ huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u)) + check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) ~= huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) + check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) !~ huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u)) + check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) ~= huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2)) + check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) !~ huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(12'u, 2)) + test "equality": check huffmanLeaf(12'u) == huffmanLeaf(12'u) check huffmanLeaf(12'u) != huffmanLeaf(21'u) check huffmanLeaf(12'u) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u)) check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) == huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u)) + check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) == huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) + check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) != huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2)) check tree == tree + test "weight comparison": + check huffmanLeaf(12'u, 1) < huffmanLeaf(12'u, 2) + check huffmanLeaf(12'u, 2) > huffmanLeaf(12'u, 1) + check huffmanLeaf(12'u, 1) < huffmanLeaf(12'u, 1) == false + check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) < huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2)) + test "maxValue": check tree.maxValue() == 3 @@ -52,7 +71,7 @@ suite "huffmantree": stream.setPosition(0) let bitReader = stream.bitReader() - check huffmantree.deserialise(bitReader, uint) == tree + check huffmantree.deserialise(bitReader, uint) ~= tree test "serialise": let stream = newStringStream() @@ -72,3 +91,13 @@ suite "huffmantree": check bitReader.readBits(2, uint8) == 2 check bitReader.readBool() == true # 3 leaf check bitReader.readBits(2, uint8) == 3 + + test "symbolQueue": + var symbolQueue = symbolQueue(stats) + check symbolQueue.len == 3 + check symbolQueue.pop() == huffmanLeaf(2'u, 1) + check symbolQueue.pop() == huffmanLeaf(3'u, 2) + check symbolQueue.pop() == huffmanLeaf(1'u, 3) + + test "buildHuffmanTree": + check buildHuffmanTree(stats) ~= tree diff --git a/tests/tlzsschain.nim b/tests/tlzsschain.nim index 241a0f1..a8c2012 100644 --- a/tests/tlzsschain.nim +++ b/tests/tlzsschain.nim @@ -14,11 +14,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import unittest +import unittest, sequtils, tables import polyfill, lzssnode, lzsschain suite "lzsschain": - test "decode": + proc chain(): LzssChain = let chainArray = [ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), lzssCharacter(3), lzssCharacter(4), lzssCharacter(5), @@ -27,4 +27,16 @@ suite "lzsschain": lzssReference(3, 3), lzssCharacter(5)] var chain = lzssChain() for node in chainArray: chain.append(node) - check chain.decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5] + result = chain + + test "decode": + check chain().decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5] + + test "stats": + let stats = chain().stats() + check stats.characters == newCountTable(concat( + repeat(0'u8, 2), repeat(1'u8, 2), repeat(2'u8, 1), repeat(3'u8, 1), repeat(4'u8, 1), repeat(5'u8, 3))) + check stats.lengths == newCountTable(concat( + repeat(3, 2), repeat(4, 1))) + check stats.positions == newCountTable(concat( + repeat(3, 1), repeat(6, 1), repeat(8, 1))) -- cgit v1.2.3