From d661132528d5c27148a0b55d52709ce97124000a Mon Sep 17 00:00:00 2001
From: pacien
Date: Wed, 28 Nov 2018 15:20:14 +0100
Subject: add huffman tree structure and serialisation
---
src/huffmantree.nim | 70 +++++++++++++++++++++++++++++++++++++++++++++++
src/integers.nim | 6 ++++
tests/thuffmantree.nim | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
tests/tintegers.nim | 4 +++
4 files changed, 154 insertions(+)
create mode 100644 src/huffmantree.nim
create mode 100644 tests/thuffmantree.nim
diff --git a/src/huffmantree.nim b/src/huffmantree.nim
new file mode 100644
index 0000000..1711879
--- /dev/null
+++ b/src/huffmantree.nim
@@ -0,0 +1,70 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import integers, bitreader, bitwriter
+
+const valueLengthFieldBitLength* = 6 # 64
+
+type HuffmanTreeNodeKind* = enum
+ branch,
+ leaf
+
+type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object
+ case kind: HuffmanTreeNodeKind
+ of branch:
+ left, right: HuffmanTreeNode[T]
+ of leaf:
+ value: T
+
+proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] =
+ HuffmanTreeNode[T](kind: branch, left: left, right: right)
+
+proc huffmanLeaf*[T](value: T): HuffmanTreeNode[T] =
+ HuffmanTreeNode[T](kind: leaf, value: value)
+
+proc `==`*[T](a, b: HuffmanTreeNode[T]): bool =
+ if a.kind != b.kind: return false
+ case a.kind:
+ of branch: a.left == b.left and a.right == b.right
+ of leaf: a.value == b.value
+
+proc maxValue*[T](node: HuffmanTreeNode[T]): T =
+ case node.kind:
+ of branch: max(node.left.maxValue(), node.right.maxValue())
+ of leaf: node.value
+
+proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] =
+ let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int
+ proc readNode(): HuffmanTreeNode[T] =
+ case bitReader.readBool():
+ of false: huffmanBranch(readNode(), readNode())
+ of true: huffmanLeaf(bitReader.readBits(valueBitLength, valueType))
+ readNode()
+
+proc serialise*[T](tree: HuffmanTreeNode[T], bitWriter: BitWriter) =
+ let maxValue = tree.maxValue()
+ let valueBitLength = maxValue.bitLength()
+ proc writeNode(node: HuffmanTreeNode[T]) =
+ case node.kind:
+ of branch:
+ bitWriter.writeBool(false)
+ writeNode(node.left)
+ writeNode(node.right)
+ of leaf:
+ bitWriter.writeBool(true)
+ bitWriter.writeBits(valueBitLength, node.value)
+ bitWriter.writeBits(valueLengthFieldBitLength, valueBitLength.uint8)
+ writeNode(tree)
diff --git a/src/integers.nim b/src/integers.nim
index 7b0f166..c93c9b8 100644
--- a/src/integers.nim
+++ b/src/integers.nim
@@ -22,6 +22,12 @@ proc `/^`*[T: Natural](x, y: T): T =
proc truncateToUint8*(x: SomeUnsignedInt): uint8 =
(x and uint8.high).uint8
+proc bitLength*[T: SomeUnsignedInt](x: T): int =
+ var buf = x
+ while buf > 0.T:
+ buf = buf shr 1
+ result += 1
+
proc leastSignificantBits*[T: SomeUnsignedInt](x: T, bits: int): T =
let maskOffset = sizeof(T) * wordBitLength - bits
if maskOffset >= 0: (x shl maskOffset) shr maskOffset else: x
diff --git a/tests/thuffmantree.nim b/tests/thuffmantree.nim
new file mode 100644
index 0000000..ec40bdb
--- /dev/null
+++ b/tests/thuffmantree.nim
@@ -0,0 +1,74 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest, streams
+import bitreader, bitwriter, huffmantree
+
+suite "huffmantree":
+ let tree = huffmanBranch(
+ huffmanLeaf(1'u),
+ huffmanBranch(
+ huffmanLeaf(2'u),
+ huffmanLeaf(3'u)))
+
+ test "equality":
+ check huffmanLeaf(12'u) == huffmanLeaf(12'u)
+ check huffmanLeaf(12'u) != huffmanLeaf(21'u)
+ check huffmanLeaf(12'u) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u))
+ check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) == huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u))
+ check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u))
+ check tree == tree
+
+ test "maxValue":
+ check tree.maxValue() == 3
+
+ test "deserialise":
+ let stream = newStringStream()
+ defer: stream.close()
+ let bitWriter = stream.bitWriter()
+ bitWriter.writeBits(valueLengthFieldBitLength, 2'u8)
+ bitWriter.writeBool(false) # root
+ bitWriter.writeBool(true) # 1 leaf
+ bitWriter.writeBits(2, 1'u)
+ bitWriter.writeBool(false) # right branch
+ bitWriter.writeBool(true) # 2 leaf
+ bitWriter.writeBits(2, 2'u)
+ bitWriter.writeBool(true) # 3 leaf
+ bitWriter.writeBits(2, 3'u)
+ bitWriter.flush()
+
+ stream.setPosition(0)
+ let bitReader = stream.bitReader()
+ check huffmantree.deserialise(bitReader, uint) == tree
+
+ test "serialise":
+ let stream = newStringStream()
+ defer: stream.close()
+ let bitWriter = stream.bitWriter()
+ tree.serialise(bitWriter)
+ bitWriter.flush()
+
+ stream.setPosition(0)
+ let bitReader = stream.bitReader()
+ check bitReader.readBits(valueLengthFieldBitLength, uint8) == 2
+ check bitReader.readBool() == false # root
+ check bitReader.readBool() == true # 1 leaf
+ check bitReader.readBits(2, uint8) == 1
+ check bitReader.readBool() == false # right branch
+ check bitReader.readBool() == true # 2 leaf
+ check bitReader.readBits(2, uint8) == 2
+ check bitReader.readBool() == true # 3 leaf
+ check bitReader.readBits(2, uint8) == 3
diff --git a/tests/tintegers.nim b/tests/tintegers.nim
index 956e4aa..851e926 100644
--- a/tests/tintegers.nim
+++ b/tests/tintegers.nim
@@ -27,6 +27,10 @@ suite "integers":
check truncateToUint8(0x00FA'u16) == 0xFA'u8
check truncateToUint8(0xFFFA'u16) == 0xFA'u8
+ test "bitLength":
+ check bitLength(0b1_1111) == 5
+ check bitLength(0b1000_0000) == 8
+
test "leastSignificantBits":
check leastSignificantBits(0xFF'u8, 3) == 0b0000_0111'u8
check leastSignificantBits(0b0001_0101'u8, 3) == 0b0000_0101'u8
--
cgit v1.2.3