From d661132528d5c27148a0b55d52709ce97124000a Mon Sep 17 00:00:00 2001 From: pacien Date: Wed, 28 Nov 2018 15:20:14 +0100 Subject: add huffman tree structure and serialisation --- src/huffmantree.nim | 70 +++++++++++++++++++++++++++++++++++++++++++++++ src/integers.nim | 6 ++++ tests/thuffmantree.nim | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/tintegers.nim | 4 +++ 4 files changed, 154 insertions(+) create mode 100644 src/huffmantree.nim create mode 100644 tests/thuffmantree.nim diff --git a/src/huffmantree.nim b/src/huffmantree.nim new file mode 100644 index 0000000..1711879 --- /dev/null +++ b/src/huffmantree.nim @@ -0,0 +1,70 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import integers, bitreader, bitwriter + +const valueLengthFieldBitLength* = 6 # 64 + +type HuffmanTreeNodeKind* = enum + branch, + leaf + +type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object + case kind: HuffmanTreeNodeKind + of branch: + left, right: HuffmanTreeNode[T] + of leaf: + value: T + +proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] = + HuffmanTreeNode[T](kind: branch, left: left, right: right) + +proc huffmanLeaf*[T](value: T): HuffmanTreeNode[T] = + HuffmanTreeNode[T](kind: leaf, value: value) + +proc `==`*[T](a, b: HuffmanTreeNode[T]): bool = + if a.kind != b.kind: return false + case a.kind: + of branch: a.left == b.left and a.right == b.right + of leaf: a.value == b.value + +proc maxValue*[T](node: HuffmanTreeNode[T]): T = + case node.kind: + of branch: max(node.left.maxValue(), node.right.maxValue()) + of leaf: node.value + +proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] = + let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int + proc readNode(): HuffmanTreeNode[T] = + case bitReader.readBool(): + of false: huffmanBranch(readNode(), readNode()) + of true: huffmanLeaf(bitReader.readBits(valueBitLength, valueType)) + readNode() + +proc serialise*[T](tree: HuffmanTreeNode[T], bitWriter: BitWriter) = + let maxValue = tree.maxValue() + let valueBitLength = maxValue.bitLength() + proc writeNode(node: HuffmanTreeNode[T]) = + case node.kind: + of branch: + bitWriter.writeBool(false) + writeNode(node.left) + writeNode(node.right) + of leaf: + bitWriter.writeBool(true) + bitWriter.writeBits(valueBitLength, node.value) + bitWriter.writeBits(valueLengthFieldBitLength, valueBitLength.uint8) + writeNode(tree) diff --git a/src/integers.nim b/src/integers.nim index 7b0f166..c93c9b8 100644 --- a/src/integers.nim +++ b/src/integers.nim @@ -22,6 +22,12 @@ proc `/^`*[T: Natural](x, y: T): T = proc truncateToUint8*(x: SomeUnsignedInt): uint8 = (x and uint8.high).uint8 +proc bitLength*[T: SomeUnsignedInt](x: T): int = + var buf = x + while buf > 0.T: + buf = buf shr 1 + result += 1 + proc leastSignificantBits*[T: SomeUnsignedInt](x: T, bits: int): T = let maskOffset = sizeof(T) * wordBitLength - bits if maskOffset >= 0: (x shl maskOffset) shr maskOffset else: x diff --git a/tests/thuffmantree.nim b/tests/thuffmantree.nim new file mode 100644 index 0000000..ec40bdb --- /dev/null +++ b/tests/thuffmantree.nim @@ -0,0 +1,74 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest, streams +import bitreader, bitwriter, huffmantree + +suite "huffmantree": + let tree = huffmanBranch( + huffmanLeaf(1'u), + huffmanBranch( + huffmanLeaf(2'u), + huffmanLeaf(3'u))) + + test "equality": + check huffmanLeaf(12'u) == huffmanLeaf(12'u) + check huffmanLeaf(12'u) != huffmanLeaf(21'u) + check huffmanLeaf(12'u) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(12'u)) + check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) == huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) + check huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(21'u)) != huffmanBranch(huffmanLeaf(12'u), huffmanLeaf(1'u)) + check tree == tree + + test "maxValue": + check tree.maxValue() == 3 + + test "deserialise": + let stream = newStringStream() + defer: stream.close() + let bitWriter = stream.bitWriter() + bitWriter.writeBits(valueLengthFieldBitLength, 2'u8) + bitWriter.writeBool(false) # root + bitWriter.writeBool(true) # 1 leaf + bitWriter.writeBits(2, 1'u) + bitWriter.writeBool(false) # right branch + bitWriter.writeBool(true) # 2 leaf + bitWriter.writeBits(2, 2'u) + bitWriter.writeBool(true) # 3 leaf + bitWriter.writeBits(2, 3'u) + bitWriter.flush() + + stream.setPosition(0) + let bitReader = stream.bitReader() + check huffmantree.deserialise(bitReader, uint) == tree + + test "serialise": + let stream = newStringStream() + defer: stream.close() + let bitWriter = stream.bitWriter() + tree.serialise(bitWriter) + bitWriter.flush() + + stream.setPosition(0) + let bitReader = stream.bitReader() + check bitReader.readBits(valueLengthFieldBitLength, uint8) == 2 + check bitReader.readBool() == false # root + check bitReader.readBool() == true # 1 leaf + check bitReader.readBits(2, uint8) == 1 + check bitReader.readBool() == false # right branch + check bitReader.readBool() == true # 2 leaf + check bitReader.readBits(2, uint8) == 2 + check bitReader.readBool() == true # 3 leaf + check bitReader.readBits(2, uint8) == 3 diff --git a/tests/tintegers.nim b/tests/tintegers.nim index 956e4aa..851e926 100644 --- a/tests/tintegers.nim +++ b/tests/tintegers.nim @@ -27,6 +27,10 @@ suite "integers": check truncateToUint8(0x00FA'u16) == 0xFA'u8 check truncateToUint8(0xFFFA'u16) == 0xFA'u8 + test "bitLength": + check bitLength(0b1_1111) == 5 + check bitLength(0b1000_0000) == 8 + test "leastSignificantBits": check leastSignificantBits(0xFF'u8, 3) == 0b0000_0111'u8 check leastSignificantBits(0b0001_0101'u8, 3) == 0b0000_0101'u8 -- cgit v1.2.3