From 5cc4256a931b98ea167291397421d0db60c5d40c Mon Sep 17 00:00:00 2001 From: pacien Date: Sun, 2 Dec 2018 00:22:56 +0100 Subject: implement lzss block --- src/blocks/lzssblock.nim | 31 +++++++++++++++++++++++++------ src/huffman/huffmantree.nim | 10 +++++----- src/lzss/lzsschain.nim | 17 ++++++----------- src/lzss/lzssencoder.nim | 2 +- src/lzsshuffman/lzsshuffmandecoder.nim | 34 ++++++++++++++++++++++++++++++++++ src/lzsshuffman/lzsshuffmanencoder.nim | 34 ++++++++++++++++++++++++++++++++++ src/lzsshuffman/lzsshuffmanstats.nim | 32 ++++++++++++++++++++++++++++++++ src/lzsshuffman/lzsshuffmansymbol.nim | 34 ++++++++++++++++++++++++++++++++++ 8 files changed, 171 insertions(+), 23 deletions(-) create mode 100644 src/lzsshuffman/lzsshuffmandecoder.nim create mode 100644 src/lzsshuffman/lzsshuffmanencoder.nim create mode 100644 src/lzsshuffman/lzsshuffmanstats.nim create mode 100644 src/lzsshuffman/lzsshuffmansymbol.nim (limited to 'src') diff --git a/src/blocks/lzssblock.nim b/src/blocks/lzssblock.nim index f68f665..b23cee2 100644 --- a/src/blocks/lzssblock.nim +++ b/src/blocks/lzssblock.nim @@ -14,19 +14,38 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import ../bitio/bitreader, ../bitio/bitwriter +import lists +import ../bitio/integers, ../bitio/bitreader, ../bitio/bitwriter +import ../lzss/lzsschain, ../lzss/lzssencoder +import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder, ../huffman/huffmandecoder +import ../lzsshuffman/lzsshuffmanstats, ../lzsshuffman/lzsshuffmandecoder, ../lzsshuffman/lzsshuffmanencoder + +const maxDataByteLength = 32_000 type LzssBlock* = object - discard + lzssChain: LzssChain proc readSerialised*(bitReader: BitReader): LzssBlock = - discard + let symbolHuffmanTree = huffmantree.deserialise(bitReader, uint16) + let positionHuffmanTree = huffmantree.deserialise(bitReader, uint16) + let symbolDecoder = symbolHuffmanTree.decoder() + let positionDecoder = positionHuffmanTree.decoder() + LzssBlock(lzssChain: readChain(bitReader, symbolDecoder, positionDecoder, maxDataByteLength)) proc writeSerialisedTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) = - discard + let (symbolStats, positionStats) = aggregateStats(lzssBlock.lzssChain) + let symbolHuffmanTree = buildHuffmanTree(symbolStats) + let positionHuffmanTree = buildHuffmanTree(positionStats) + let symbolEncoder = symbolHuffmanTree.encoder(uint16) + let positionEncoder = positionHuffmanTree.encoder(uint16) + symbolHuffmanTree.serialise(bitWriter) + positionHuffmanTree.serialise(bitWriter) + lzssBlock.lzssChain.writeChain(symbolEncoder, positionEncoder, bitWriter) proc readRaw*(bitReader: BitReader): LzssBlock = - discard + let byteBuf = bitReader.readSeq(maxDataByteLength, uint8) + LzssBlock(lzssChain: lzssEncode(byteBuf.data)) proc writeRawTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) = - discard + let byteSeq = lzssBlock.lzssChain.decode() + bitWriter.writeSeq(byteSeq.len * wordBitLength, byteSeq) diff --git a/src/huffman/huffmantree.nim b/src/huffman/huffmantree.nim index 58a840e..f3fce1b 100644 --- a/src/huffman/huffmantree.nim +++ b/src/huffman/huffmantree.nim @@ -31,6 +31,11 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object of leaf: value*: T +proc maxValue*[T](node: HuffmanTreeNode[T]): T = + case node.kind: + of branch: node.maxChildValue + of leaf: node.value + proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] = HuffmanTreeNode[T]( kind: branch, left: left, right: right, @@ -45,11 +50,6 @@ proc `==`*[T](a, b: HuffmanTreeNode[T]): bool = of branch: a.left == b.left and a.right == b.right of leaf: a.value == b.value -proc maxValue*[T](node: HuffmanTreeNode[T]): T = - case node.kind: - of branch: node.maxChildValue - of leaf: node.value - proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] = let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int proc readNode(): HuffmanTreeNode[T] = diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim index 8b49914..8ebcb1a 100644 --- a/src/lzss/lzsschain.nim +++ b/src/lzss/lzsschain.nim @@ -15,7 +15,7 @@ # along with this program. If not, see . import lists, tables, sugar -import ../bitio/integers, ../huffman/huffmantree +import ../bitio/integers import listpolyfill, lzssnode const maxChainByteLength = 32_000 * wordBitLength @@ -26,6 +26,11 @@ type LzssChain* = proc lzssChain*(): LzssChain = initSinglyLinkedList[LzssNode]() +proc lzssChain*(chainArray: openArray[LzssNode]): LzssChain = + var chain = lzssChain() + for node in chainArray: chain.append(node) + chain + proc decode*(lzssChain: LzssChain): seq[uint8] = result = newSeqOfCap[uint8](maxChainByteLength) for node in lzssChain.items: @@ -35,13 +40,3 @@ proc decode*(lzssChain: LzssChain): seq[uint8] = of reference: let absolutePos = result.len - node.relativePos result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) - -proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] = - result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]()) - for node in lzssChain.items: - case node.kind: - of character: - result.characters.inc(node.character) - of reference: - result.lengths.inc(node.length) - result.positions.inc(node.relativePos) diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim index 8b750fb..82fbe7b 100644 --- a/src/lzss/lzssencoder.nim +++ b/src/lzss/lzssencoder.nim @@ -17,7 +17,7 @@ import lists import listpolyfill, matchtable, lzssnode, lzsschain -const matchGroupLength = 3 +const matchGroupLength* = 3 const maxRefByteLength = high(uint8).int + matchGroupLength let emptySinglyLinkedList = initSinglyLinkedList[int]() diff --git a/src/lzsshuffman/lzsshuffmandecoder.nim b/src/lzsshuffman/lzsshuffmandecoder.nim new file mode 100644 index 0000000..cd71914 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmandecoder.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../bitio/bitreader +import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain +import ../huffman/huffmantree, ../huffman/huffmandecoder +import lzsshuffmansymbol + +proc readChain*(bitReader: BitReader, symbolDecoder, positionDecoder: HuffmanDecoder[uint16], maxDataByteLength: int): LzssChain = + var chain = lzssChain() + var (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, 0) + while not symbol.isEndMarker(): + if byteCursor > maxDataByteLength: raise newException(IOError, "lzss block too long") + if symbol.isCharacter(): + chain.append(lzssCharacter(symbol.uint8)) + else: + let position = positionDecoder.decode(bitReader) + chain.append(unpackLzssReference(symbol, position)) + (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, byteCursor + 1) + chain diff --git a/src/lzsshuffman/lzsshuffmanencoder.nim b/src/lzsshuffman/lzsshuffmanencoder.nim new file mode 100644 index 0000000..ea89f85 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmanencoder.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../bitio/bitwriter +import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain, ../lzss/lzssencoder +import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder +import lzsshuffmansymbol + +proc writeSymbol(bitWriter: BitWriter, encodedSymbol: tuple[bitLength: int, value: uint16]) = + bitWriter.writeBits(encodedSymbol.bitLength, encodedSymbol.value) + +proc writeChain*(lzssChain: LzssChain, symbolEncoder, positionEncoder: HuffmanEncoder[uint16, uint16], bitWriter: BitWriter) = + for node in lzssChain.items: + case node.kind: + of character: + bitWriter.writeSymbol(symbolEncoder.encode(node.character)) + of reference: + bitWriter.writeSymbol(symbolEncoder.encode(shiftLzssLength(node.length))) + bitWriter.writeSymbol(positionEncoder.encode(node.relativePos.uint16)) + bitWriter.writeSymbol(symbolEncoder.encode(endSymbol)) diff --git a/src/lzsshuffman/lzsshuffmanstats.nim b/src/lzsshuffman/lzsshuffmanstats.nim new file mode 100644 index 0000000..037ce5f --- /dev/null +++ b/src/lzsshuffman/lzsshuffmanstats.nim @@ -0,0 +1,32 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tables, lists +import ../lzss/lzssnode, ../lzss/lzsschain +import lzsshuffmansymbol + +proc aggregateStats*(chain: LzssChain): tuple[symbolTable, positionTable: CountTableRef[uint16]] = + var (symbolTable, positionTable) = (newCountTable[uint16](), newCountTable[uint16]()) + for node in chain.items: + case node.kind: + of character: + symbolTable.inc(node.character) + of reference: + symbolTable.inc(shiftLzssLength(node.length)) + positionTable.inc(node.relativePos.uint16) + symbolTable.inc(endSymbol) + if positionTable.len < 1: positionTable.inc(0) # ensure non-empty tree + (symbolTable, positionTable) diff --git a/src/lzsshuffman/lzsshuffmansymbol.nim b/src/lzsshuffman/lzsshuffmansymbol.nim new file mode 100644 index 0000000..cfd3fe4 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmansymbol.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../lzss/lzssnode, ../lzss/lzssencoder + +type Symbol* = uint16 + +const endSymbol* = 256.Symbol + +proc isEndMarker*(symbol: Symbol): bool = + symbol == endSymbol + +proc isCharacter*(symbol: Symbol): bool = + symbol < endSymbol + +proc unpackLzssReference*(symbol: Symbol, position: uint16): LzssNode = + lzssReference(symbol.int - endSymbol.int - 1 + matchGroupLength, position.int) + +proc shiftLzssLength*(length: int): uint16 = + (length + endSymbol.int + 1 - matchGroupLength).uint16 -- cgit v1.2.3