From 5cc4256a931b98ea167291397421d0db60c5d40c Mon Sep 17 00:00:00 2001
From: pacien
Date: Sun, 2 Dec 2018 00:22:56 +0100
Subject: implement lzss block
---
src/blocks/lzssblock.nim | 31 +++++++++++++++++++++++++------
src/huffman/huffmantree.nim | 10 +++++-----
src/lzss/lzsschain.nim | 17 ++++++-----------
src/lzss/lzssencoder.nim | 2 +-
src/lzsshuffman/lzsshuffmandecoder.nim | 34 ++++++++++++++++++++++++++++++++++
src/lzsshuffman/lzsshuffmanencoder.nim | 34 ++++++++++++++++++++++++++++++++++
src/lzsshuffman/lzsshuffmanstats.nim | 32 ++++++++++++++++++++++++++++++++
src/lzsshuffman/lzsshuffmansymbol.nim | 34 ++++++++++++++++++++++++++++++++++
8 files changed, 171 insertions(+), 23 deletions(-)
create mode 100644 src/lzsshuffman/lzsshuffmandecoder.nim
create mode 100644 src/lzsshuffman/lzsshuffmanencoder.nim
create mode 100644 src/lzsshuffman/lzsshuffmanstats.nim
create mode 100644 src/lzsshuffman/lzsshuffmansymbol.nim
(limited to 'src')
diff --git a/src/blocks/lzssblock.nim b/src/blocks/lzssblock.nim
index f68f665..b23cee2 100644
--- a/src/blocks/lzssblock.nim
+++ b/src/blocks/lzssblock.nim
@@ -14,19 +14,38 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import ../bitio/bitreader, ../bitio/bitwriter
+import lists
+import ../bitio/integers, ../bitio/bitreader, ../bitio/bitwriter
+import ../lzss/lzsschain, ../lzss/lzssencoder
+import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder, ../huffman/huffmandecoder
+import ../lzsshuffman/lzsshuffmanstats, ../lzsshuffman/lzsshuffmandecoder, ../lzsshuffman/lzsshuffmanencoder
+
+const maxDataByteLength = 32_000
type LzssBlock* = object
- discard
+ lzssChain: LzssChain
proc readSerialised*(bitReader: BitReader): LzssBlock =
- discard
+ let symbolHuffmanTree = huffmantree.deserialise(bitReader, uint16)
+ let positionHuffmanTree = huffmantree.deserialise(bitReader, uint16)
+ let symbolDecoder = symbolHuffmanTree.decoder()
+ let positionDecoder = positionHuffmanTree.decoder()
+ LzssBlock(lzssChain: readChain(bitReader, symbolDecoder, positionDecoder, maxDataByteLength))
proc writeSerialisedTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) =
- discard
+ let (symbolStats, positionStats) = aggregateStats(lzssBlock.lzssChain)
+ let symbolHuffmanTree = buildHuffmanTree(symbolStats)
+ let positionHuffmanTree = buildHuffmanTree(positionStats)
+ let symbolEncoder = symbolHuffmanTree.encoder(uint16)
+ let positionEncoder = positionHuffmanTree.encoder(uint16)
+ symbolHuffmanTree.serialise(bitWriter)
+ positionHuffmanTree.serialise(bitWriter)
+ lzssBlock.lzssChain.writeChain(symbolEncoder, positionEncoder, bitWriter)
proc readRaw*(bitReader: BitReader): LzssBlock =
- discard
+ let byteBuf = bitReader.readSeq(maxDataByteLength, uint8)
+ LzssBlock(lzssChain: lzssEncode(byteBuf.data))
proc writeRawTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) =
- discard
+ let byteSeq = lzssBlock.lzssChain.decode()
+ bitWriter.writeSeq(byteSeq.len * wordBitLength, byteSeq)
diff --git a/src/huffman/huffmantree.nim b/src/huffman/huffmantree.nim
index 58a840e..f3fce1b 100644
--- a/src/huffman/huffmantree.nim
+++ b/src/huffman/huffmantree.nim
@@ -31,6 +31,11 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object
of leaf:
value*: T
+proc maxValue*[T](node: HuffmanTreeNode[T]): T =
+ case node.kind:
+ of branch: node.maxChildValue
+ of leaf: node.value
+
proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] =
HuffmanTreeNode[T](
kind: branch, left: left, right: right,
@@ -45,11 +50,6 @@ proc `==`*[T](a, b: HuffmanTreeNode[T]): bool =
of branch: a.left == b.left and a.right == b.right
of leaf: a.value == b.value
-proc maxValue*[T](node: HuffmanTreeNode[T]): T =
- case node.kind:
- of branch: node.maxChildValue
- of leaf: node.value
-
proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] =
let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int
proc readNode(): HuffmanTreeNode[T] =
diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim
index 8b49914..8ebcb1a 100644
--- a/src/lzss/lzsschain.nim
+++ b/src/lzss/lzsschain.nim
@@ -15,7 +15,7 @@
# along with this program. If not, see .
import lists, tables, sugar
-import ../bitio/integers, ../huffman/huffmantree
+import ../bitio/integers
import listpolyfill, lzssnode
const maxChainByteLength = 32_000 * wordBitLength
@@ -26,6 +26,11 @@ type LzssChain* =
proc lzssChain*(): LzssChain =
initSinglyLinkedList[LzssNode]()
+proc lzssChain*(chainArray: openArray[LzssNode]): LzssChain =
+ var chain = lzssChain()
+ for node in chainArray: chain.append(node)
+ chain
+
proc decode*(lzssChain: LzssChain): seq[uint8] =
result = newSeqOfCap[uint8](maxChainByteLength)
for node in lzssChain.items:
@@ -35,13 +40,3 @@ proc decode*(lzssChain: LzssChain): seq[uint8] =
of reference:
let absolutePos = result.len - node.relativePos
result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
-
-proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] =
- result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]())
- for node in lzssChain.items:
- case node.kind:
- of character:
- result.characters.inc(node.character)
- of reference:
- result.lengths.inc(node.length)
- result.positions.inc(node.relativePos)
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
index 8b750fb..82fbe7b 100644
--- a/src/lzss/lzssencoder.nim
+++ b/src/lzss/lzssencoder.nim
@@ -17,7 +17,7 @@
import lists
import listpolyfill, matchtable, lzssnode, lzsschain
-const matchGroupLength = 3
+const matchGroupLength* = 3
const maxRefByteLength = high(uint8).int + matchGroupLength
let emptySinglyLinkedList = initSinglyLinkedList[int]()
diff --git a/src/lzsshuffman/lzsshuffmandecoder.nim b/src/lzsshuffman/lzsshuffmandecoder.nim
new file mode 100644
index 0000000..cd71914
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmandecoder.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../bitio/bitreader
+import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain
+import ../huffman/huffmantree, ../huffman/huffmandecoder
+import lzsshuffmansymbol
+
+proc readChain*(bitReader: BitReader, symbolDecoder, positionDecoder: HuffmanDecoder[uint16], maxDataByteLength: int): LzssChain =
+ var chain = lzssChain()
+ var (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, 0)
+ while not symbol.isEndMarker():
+ if byteCursor > maxDataByteLength: raise newException(IOError, "lzss block too long")
+ if symbol.isCharacter():
+ chain.append(lzssCharacter(symbol.uint8))
+ else:
+ let position = positionDecoder.decode(bitReader)
+ chain.append(unpackLzssReference(symbol, position))
+ (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, byteCursor + 1)
+ chain
diff --git a/src/lzsshuffman/lzsshuffmanencoder.nim b/src/lzsshuffman/lzsshuffmanencoder.nim
new file mode 100644
index 0000000..ea89f85
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmanencoder.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../bitio/bitwriter
+import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain, ../lzss/lzssencoder
+import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder
+import lzsshuffmansymbol
+
+proc writeSymbol(bitWriter: BitWriter, encodedSymbol: tuple[bitLength: int, value: uint16]) =
+ bitWriter.writeBits(encodedSymbol.bitLength, encodedSymbol.value)
+
+proc writeChain*(lzssChain: LzssChain, symbolEncoder, positionEncoder: HuffmanEncoder[uint16, uint16], bitWriter: BitWriter) =
+ for node in lzssChain.items:
+ case node.kind:
+ of character:
+ bitWriter.writeSymbol(symbolEncoder.encode(node.character))
+ of reference:
+ bitWriter.writeSymbol(symbolEncoder.encode(shiftLzssLength(node.length)))
+ bitWriter.writeSymbol(positionEncoder.encode(node.relativePos.uint16))
+ bitWriter.writeSymbol(symbolEncoder.encode(endSymbol))
diff --git a/src/lzsshuffman/lzsshuffmanstats.nim b/src/lzsshuffman/lzsshuffmanstats.nim
new file mode 100644
index 0000000..037ce5f
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmanstats.nim
@@ -0,0 +1,32 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import tables, lists
+import ../lzss/lzssnode, ../lzss/lzsschain
+import lzsshuffmansymbol
+
+proc aggregateStats*(chain: LzssChain): tuple[symbolTable, positionTable: CountTableRef[uint16]] =
+ var (symbolTable, positionTable) = (newCountTable[uint16](), newCountTable[uint16]())
+ for node in chain.items:
+ case node.kind:
+ of character:
+ symbolTable.inc(node.character)
+ of reference:
+ symbolTable.inc(shiftLzssLength(node.length))
+ positionTable.inc(node.relativePos.uint16)
+ symbolTable.inc(endSymbol)
+ if positionTable.len < 1: positionTable.inc(0) # ensure non-empty tree
+ (symbolTable, positionTable)
diff --git a/src/lzsshuffman/lzsshuffmansymbol.nim b/src/lzsshuffman/lzsshuffmansymbol.nim
new file mode 100644
index 0000000..cfd3fe4
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmansymbol.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../lzss/lzssnode, ../lzss/lzssencoder
+
+type Symbol* = uint16
+
+const endSymbol* = 256.Symbol
+
+proc isEndMarker*(symbol: Symbol): bool =
+ symbol == endSymbol
+
+proc isCharacter*(symbol: Symbol): bool =
+ symbol < endSymbol
+
+proc unpackLzssReference*(symbol: Symbol, position: uint16): LzssNode =
+ lzssReference(symbol.int - endSymbol.int - 1 + matchGroupLength, position.int)
+
+proc shiftLzssLength*(length: int): uint16 =
+ (length + endSymbol.int + 1 - matchGroupLength).uint16
--
cgit v1.2.3