From 5bbe75659aef55542268cbf35c66342cb22ce865 Mon Sep 17 00:00:00 2001 From: pacien Date: Fri, 30 Nov 2018 18:44:20 +0100 Subject: isolate lzss chain module --- src/lzss/listpolyfill.nim | 42 ++++++++++++++++++++++++++++++++++ src/lzss/lzsschain.nim | 47 ++++++++++++++++++++++++++++++++++++++ src/lzss/lzssencoder.nim | 58 +++++++++++++++++++++++++++++++++++++++++++++++ src/lzss/lzssnode.nim | 39 +++++++++++++++++++++++++++++++ src/lzss/matchtable.nim | 32 ++++++++++++++++++++++++++ src/lzsschain.nim | 46 ------------------------------------- src/lzssencoder.nim | 58 ----------------------------------------------- src/lzssnode.nim | 39 ------------------------------- src/matchtable.nim | 32 -------------------------- src/polyfill.nim | 42 ---------------------------------- 10 files changed, 218 insertions(+), 217 deletions(-) create mode 100644 src/lzss/listpolyfill.nim create mode 100644 src/lzss/lzsschain.nim create mode 100644 src/lzss/lzssencoder.nim create mode 100644 src/lzss/lzssnode.nim create mode 100644 src/lzss/matchtable.nim delete mode 100644 src/lzsschain.nim delete mode 100644 src/lzssencoder.nim delete mode 100644 src/lzssnode.nim delete mode 100644 src/matchtable.nim delete mode 100644 src/polyfill.nim (limited to 'src') diff --git a/src/lzss/listpolyfill.nim b/src/lzss/listpolyfill.nim new file mode 100644 index 0000000..00b30ee --- /dev/null +++ b/src/lzss/listpolyfill.nim @@ -0,0 +1,42 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists + +# https://github.com/nim-lang/Nim/pull/9805 + +proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## prepends a node to `L`. Efficiency: O(1). + n.next = L.head + L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var SinglyLinkedList[T], value: T) = + ## prepends a node to `L`. Efficiency: O(1). + listpolyfill.prepend(L, newSinglyLinkedNode(value)) + +proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = + ## appends a node `n` to `L`. Efficiency: O(1). + n.next = nil + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n + +proc append*[T](L: var SinglyLinkedList[T], value: T) = + ## appends a value to `L`. Efficiency: O(1). + append(L, newSinglyLinkedNode(value)) diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim new file mode 100644 index 0000000..2ecff9e --- /dev/null +++ b/src/lzss/lzsschain.nim @@ -0,0 +1,47 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists, tables, sugar +import ../integers, ../huffman/huffmantree +import listpolyfill, lzssnode + +const maxChainByteLength = 32_000 * wordBitLength + +type LzssChain* = + SinglyLinkedList[LzssNode] + +proc lzssChain*(): LzssChain = + initSinglyLinkedList[LzssNode]() + +proc decode*(lzssChain: LzssChain): seq[uint8] = + result = newSeqOfCap[uint8](maxChainByteLength) + for node in lzssChain.items: + case node.kind: + of character: + result.add(node.character) + of reference: + let absolutePos = result.len - node.relativePos + result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) + +proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] = + result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]()) + for node in lzssChain.items: + case node.kind: + of character: + result.characters.inc(node.character) + of reference: + result.lengths.inc(node.length) + result.positions.inc(node.relativePos) diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim new file mode 100644 index 0000000..8b750fb --- /dev/null +++ b/src/lzss/lzssencoder.nim @@ -0,0 +1,58 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import listpolyfill, matchtable, lzssnode, lzsschain + +const matchGroupLength = 3 +const maxRefByteLength = high(uint8).int + matchGroupLength +let emptySinglyLinkedList = initSinglyLinkedList[int]() + +proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = + result = skipFirst + let maxPrefixLength = min(min(a.len, b.len), maxLength) + while result < maxPrefixLength and a[result] == b[result]: result += 1 + +proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = + for startIndex in candidatePos.items: + let prefixLength = commonPrefixLength( + searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) + if prefixLength > result.length: result = (prefixLength, startIndex) + if prefixLength >= maxRefByteLength: return + +proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = + for cursor in fromPosIncl..(toPosExcl - matchGroupLength): + let group = buffer[cursor..<(cursor + matchGroupLength)] + matchTable.addMatch(group, cursor) + +proc lzssEncode*(buf: openArray[uint8]): LzssChain = + result = initSinglyLinkedList[LzssNode]() + let matchTable = initMatchTable(seq[uint8], int) + var cursor = 0 + while cursor < buf.len() - matchGroupLength: + let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) + let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) + if prefix.length > 0: + result.append(lzssReference(prefix.length, cursor - prefix.pos)) + cursor += prefix.length + else: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 + if cursor - prefix.length >= matchGroupLength: + matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor) + while cursor < buf.len: + result.append(lzssCharacter(buf[cursor])) + cursor += 1 diff --git a/src/lzss/lzssnode.nim b/src/lzss/lzssnode.nim new file mode 100644 index 0000000..de5958d --- /dev/null +++ b/src/lzss/lzssnode.nim @@ -0,0 +1,39 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +type LzssNodeKind* = enum + character, + reference + +type LzssNode* = object + case kind*: LzssNodeKind + of character: + character*: uint8 + of reference: + length*: int + relativePos*: int + +proc lzssCharacter*(value: uint8): LzssNode = + LzssNode(kind: character, character: value) + +proc lzssReference*(length, relativePos: int): LzssNode = + LzssNode(kind: reference, length: length, relativePos: relativePos) + +proc `==`*(a, b: LzssNode): bool = + if a.kind != b.kind: return false + case a.kind: + of character: a.character == b.character + of reference: a.length == b.length and a.relativePos == b.relativePos diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim new file mode 100644 index 0000000..b17ce68 --- /dev/null +++ b/src/lzss/matchtable.nim @@ -0,0 +1,32 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tables, lists +import listpolyfill + +type MatchTable*[K, V] = + TableRef[K, SinglyLinkedList[V]] + +proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] = + newTable[K, SinglyLinkedList[V]]() + +proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] = + matchTable.getOrDefault(pattern, initSinglyLinkedList[V]()) + +proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = + var matchList = matchTable.matchList(pattern) + listpolyfill.prepend(matchList, value) + matchTable[pattern] = matchList diff --git a/src/lzsschain.nim b/src/lzsschain.nim deleted file mode 100644 index 44200f2..0000000 --- a/src/lzsschain.nim +++ /dev/null @@ -1,46 +0,0 @@ -# gzip-like LZSS compressor -# Copyright (C) 2018 Pacien TRAN-GIRARD -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import lists, tables, sugar -import polyfill, integers, lzssnode, huffman/huffmantree - -const maxChainByteLength = 32_000 * wordBitLength - -type LzssChain* = - SinglyLinkedList[LzssNode] - -proc lzssChain*(): LzssChain = - initSinglyLinkedList[LzssNode]() - -proc decode*(lzssChain: LzssChain): seq[uint8] = - result = newSeqOfCap[uint8](maxChainByteLength) - for node in lzssChain.items: - case node.kind: - of character: - result.add(node.character) - of reference: - let absolutePos = result.len - node.relativePos - result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) - -proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] = - result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]()) - for node in lzssChain.items: - case node.kind: - of character: - result.characters.inc(node.character) - of reference: - result.lengths.inc(node.length) - result.positions.inc(node.relativePos) diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim deleted file mode 100644 index 05f3a16..0000000 --- a/src/lzssencoder.nim +++ /dev/null @@ -1,58 +0,0 @@ -# gzip-like LZSS compressor -# Copyright (C) 2018 Pacien TRAN-GIRARD -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import lists -import polyfill, matchtable, lzssnode, lzsschain - -const matchGroupLength = 3 -const maxRefByteLength = high(uint8).int + matchGroupLength -let emptySinglyLinkedList = initSinglyLinkedList[int]() - -proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = - result = skipFirst - let maxPrefixLength = min(min(a.len, b.len), maxLength) - while result < maxPrefixLength and a[result] == b[result]: result += 1 - -proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = - for startIndex in candidatePos.items: - let prefixLength = commonPrefixLength( - searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) - if prefixLength > result.length: result = (prefixLength, startIndex) - if prefixLength >= maxRefByteLength: return - -proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = - for cursor in fromPosIncl..(toPosExcl - matchGroupLength): - let group = buffer[cursor..<(cursor + matchGroupLength)] - matchTable.addMatch(group, cursor) - -proc lzssEncode*(buf: openArray[uint8]): LzssChain = - result = initSinglyLinkedList[LzssNode]() - let matchTable = initMatchTable(seq[uint8], int) - var cursor = 0 - while cursor < buf.len() - matchGroupLength: - let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) - let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) - if prefix.length > 0: - result.append(lzssReference(prefix.length, cursor - prefix.pos)) - cursor += prefix.length - else: - result.append(lzssCharacter(buf[cursor])) - cursor += 1 - if cursor - prefix.length >= matchGroupLength: - matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor) - while cursor < buf.len: - result.append(lzssCharacter(buf[cursor])) - cursor += 1 diff --git a/src/lzssnode.nim b/src/lzssnode.nim deleted file mode 100644 index de5958d..0000000 --- a/src/lzssnode.nim +++ /dev/null @@ -1,39 +0,0 @@ -# gzip-like LZSS compressor -# Copyright (C) 2018 Pacien TRAN-GIRARD -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -type LzssNodeKind* = enum - character, - reference - -type LzssNode* = object - case kind*: LzssNodeKind - of character: - character*: uint8 - of reference: - length*: int - relativePos*: int - -proc lzssCharacter*(value: uint8): LzssNode = - LzssNode(kind: character, character: value) - -proc lzssReference*(length, relativePos: int): LzssNode = - LzssNode(kind: reference, length: length, relativePos: relativePos) - -proc `==`*(a, b: LzssNode): bool = - if a.kind != b.kind: return false - case a.kind: - of character: a.character == b.character - of reference: a.length == b.length and a.relativePos == b.relativePos diff --git a/src/matchtable.nim b/src/matchtable.nim deleted file mode 100644 index 5be652c..0000000 --- a/src/matchtable.nim +++ /dev/null @@ -1,32 +0,0 @@ -# gzip-like LZSS compressor -# Copyright (C) 2018 Pacien TRAN-GIRARD -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import tables, lists -import polyfill - -type MatchTable*[K, V] = - TableRef[K, SinglyLinkedList[V]] - -proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] = - newTable[K, SinglyLinkedList[V]]() - -proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] = - matchTable.getOrDefault(pattern, initSinglyLinkedList[V]()) - -proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = - var matchList = matchTable.matchList(pattern) - polyfill.prepend(matchList, value) - matchTable[pattern] = matchList diff --git a/src/polyfill.nim b/src/polyfill.nim deleted file mode 100644 index b252953..0000000 --- a/src/polyfill.nim +++ /dev/null @@ -1,42 +0,0 @@ -# gzip-like LZSS compressor -# Copyright (C) 2018 Pacien TRAN-GIRARD -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import lists - -# https://github.com/nim-lang/Nim/pull/9805 - -proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = - ## prepends a node to `L`. Efficiency: O(1). - n.next = L.head - L.head = n - if L.tail == nil: L.tail = n - -proc prepend*[T](L: var SinglyLinkedList[T], value: T) = - ## prepends a node to `L`. Efficiency: O(1). - polyfill.prepend(L, newSinglyLinkedNode(value)) - -proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). - n.next = nil - if L.tail != nil: - assert(L.tail.next == nil) - L.tail.next = n - L.tail = n - if L.head == nil: L.head = n - -proc append*[T](L: var SinglyLinkedList[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newSinglyLinkedNode(value)) -- cgit v1.2.3