From 1524ab71168b7c214a531f796c94962776e9d88a Mon Sep 17 00:00:00 2001
From: pacien
Date: Thu, 29 Nov 2018 12:41:59 +0100
Subject: add generic huffman tree builder
---
src/huffmantree.nim | 31 +++++++++++++++++++++++++++----
src/lzsschain.nim | 12 +++++++++++-
tests/thuffmantree.nim | 33 +++++++++++++++++++++++++++++++--
tests/tlzsschain.nim | 18 +++++++++++++++---
4 files changed, 84 insertions(+), 10 deletions(-)
diff --git a/src/huffmantree.nim b/src/huffmantree.nim
index 1711879..adcaec7 100644
--- a/src/huffmantree.nim
+++ b/src/huffmantree.nim
@@ -14,6 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
+import tables, heapqueue
import integers, bitreader, bitwriter
const valueLengthFieldBitLength* = 6 # 64
@@ -28,19 +29,32 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object
left, right: HuffmanTreeNode[T]
of leaf:
value: T
+ weight: int
proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] =
- HuffmanTreeNode[T](kind: branch, left: left, right: right)
+ HuffmanTreeNode[T](kind: branch, left: left, right: right, weight: left.weight + right.weight)
-proc huffmanLeaf*[T](value: T): HuffmanTreeNode[T] =
- HuffmanTreeNode[T](kind: leaf, value: value)
+proc huffmanLeaf*[T](value: T, weight = 0): HuffmanTreeNode[T] =
+ HuffmanTreeNode[T](kind: leaf, value: value, weight: weight)
proc `==`*[T](a, b: HuffmanTreeNode[T]): bool =
- if a.kind != b.kind: return false
+ if a.kind != b.kind or a.weight != b.weight: return false
case a.kind:
of branch: a.left == b.left and a.right == b.right
of leaf: a.value == b.value
+proc `~=`*[T](a, b: HuffmanTreeNode[T]): bool =
+ if a.kind != b.kind: return false
+ case a.kind:
+ of branch: a.left ~= b.left and a.right ~= b.right
+ of leaf: a.value == b.value
+
+proc `!~`*[T](a, b: HuffmanTreeNode[T]): bool =
+ not (a ~= b)
+
+proc `<`*[T](left, right: HuffmanTreeNode[T]): bool =
+ left.weight < right.weight
+
proc maxValue*[T](node: HuffmanTreeNode[T]): T =
case node.kind:
of branch: max(node.left.maxValue(), node.right.maxValue())
@@ -68,3 +82,12 @@ proc serialise*[T](tree: HuffmanTreeNode[T], bitWriter: BitWriter) =
bitWriter.writeBits(valueBitLength, node.value)
bitWriter.writeBits(valueLengthFieldBitLength, valueBitLength.uint8)
writeNode(tree)
+
+proc symbolQueue*[T](stats: CountTableRef[T]): HeapQueue[HuffmanTreeNode[T]] =
+ result = newHeapQueue[HuffmanTreeNode[T]]()
+ for item, count in stats.pairs: result.push(huffmanLeaf(item, count))
+
+proc buildHuffmanTree*[T: SomeUnsignedInt](stats: CountTableRef[T]): HuffmanTreeNode[T] =
+ var symbolQueue = symbolQueue(stats)
+ while symbolQueue.len > 1: symbolQueue.push(huffmanBranch(symbolQueue.pop(), symbolQueue.pop()))
+ result = symbolQueue[0]
diff --git a/src/lzsschain.nim b/src/lzsschain.nim
index 8203cb8..073aa5e 100644
--- a/src/lzsschain.nim
+++ b/src/lzsschain.nim
@@ -15,7 +15,7 @@
# along with this program. If not, see .
import lists, tables, sugar
-import polyfill, integers, lzssnode
+import polyfill, integers, lzssnode, huffmantree
const maxChainByteLength = 32_000 * wordBitLength
@@ -34,3 +34,13 @@ proc decode*(lzssChain: LzssChain): seq[uint8] =
of reference:
let absolutePos = result.len - node.relativePos
result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
+
+proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] =
+ result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]())
+ for node in lzssChain.items:
+ case node.kind:
+ of character:
+ result.characters.inc(node.character)
+ of reference:
+ result.lengths.inc(node.length)
+ result.positions.inc(node.relativePos)
diff --git a/tests/thuffmantree.nim b/tests/thuffmantree.nim
index ec40bdb..705ac17 100644
--- a/tests/thuffmantree.nim
+++ b/tests/thuffmantree.nim
@@ -14,24 +14,43 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import unittest, streams
+import unittest, streams, sequtils, tables, heapqueue
import bitreader, bitwriter, huffmantree
suite "huffmantree":
+ let stats = newCountTable(concat(repeat(1'u, 3), repeat(2'u, 1), repeat(3'u, 2)))
let tree = huffmanBranch(
huffmanLeaf(1'u),
huffmanBranch(
huffmanLeaf(2'u),
huffmanLeaf(3'u)))
+ test "equivalence":
+ check huffmanLeaf(12'u) ~= huffmanLeaf(12'u)
+ check huffmanLeaf(12'u) ~= huffmanLeaf(12'u, 2)
+ check huffmanLeaf(12'u) !~ huffmanLeaf(21'u)
+ check huffmanLeaf(12'u) !~ huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u))
+ check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) ~= huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u))
+ check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) !~ huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u))
+ check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) ~= huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2))
+ check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) !~ huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(12'u, 2))
+
test "equality":
check huffmanLeaf(12'u) == huffmanLeaf(12'u)
check huffmanLeaf(12'u) != huffmanLeaf(21'u)
check huffmanLeaf(12'u) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u))
check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) == huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u))
check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u))
+ check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) == huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1))
+ check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) != huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2))
check tree == tree
+ test "weight comparison":
+ check huffmanLeaf(12'u, 1) < huffmanLeaf(12'u, 2)
+ check huffmanLeaf(12'u, 2) > huffmanLeaf(12'u, 1)
+ check huffmanLeaf(12'u, 1) < huffmanLeaf(12'u, 1) == false
+ check huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 1)) < huffmanBranch(huffmanLeaf(12'u, 1), huffmanLeaf(21'u, 2))
+
test "maxValue":
check tree.maxValue() == 3
@@ -52,7 +71,7 @@ suite "huffmantree":
stream.setPosition(0)
let bitReader = stream.bitReader()
- check huffmantree.deserialise(bitReader, uint) == tree
+ check huffmantree.deserialise(bitReader, uint) ~= tree
test "serialise":
let stream = newStringStream()
@@ -72,3 +91,13 @@ suite "huffmantree":
check bitReader.readBits(2, uint8) == 2
check bitReader.readBool() == true # 3 leaf
check bitReader.readBits(2, uint8) == 3
+
+ test "symbolQueue":
+ var symbolQueue = symbolQueue(stats)
+ check symbolQueue.len == 3
+ check symbolQueue.pop() == huffmanLeaf(2'u, 1)
+ check symbolQueue.pop() == huffmanLeaf(3'u, 2)
+ check symbolQueue.pop() == huffmanLeaf(1'u, 3)
+
+ test "buildHuffmanTree":
+ check buildHuffmanTree(stats) ~= tree
diff --git a/tests/tlzsschain.nim b/tests/tlzsschain.nim
index 241a0f1..a8c2012 100644
--- a/tests/tlzsschain.nim
+++ b/tests/tlzsschain.nim
@@ -14,11 +14,11 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import unittest
+import unittest, sequtils, tables
import polyfill, lzssnode, lzsschain
suite "lzsschain":
- test "decode":
+ proc chain(): LzssChain =
let chainArray = [
lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
lzssCharacter(3), lzssCharacter(4), lzssCharacter(5),
@@ -27,4 +27,16 @@ suite "lzsschain":
lzssReference(3, 3), lzssCharacter(5)]
var chain = lzssChain()
for node in chainArray: chain.append(node)
- check chain.decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5]
+ result = chain
+
+ test "decode":
+ check chain().decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5]
+
+ test "stats":
+ let stats = chain().stats()
+ check stats.characters == newCountTable(concat(
+ repeat(0'u8, 2), repeat(1'u8, 2), repeat(2'u8, 1), repeat(3'u8, 1), repeat(4'u8, 1), repeat(5'u8, 3)))
+ check stats.lengths == newCountTable(concat(
+ repeat(3, 2), repeat(4, 1)))
+ check stats.positions == newCountTable(concat(
+ repeat(3, 1), repeat(6, 1), repeat(8, 1)))
--
cgit v1.2.3