Imports #
"bufio"
"io"
"io"
"cmp"
"slices"
"bufio"
"io"
"io"
"cmp"
"slices"
const bzip2BlockMagic = 0x314159265359
const bzip2FileMagic = 0x425a
const bzip2FinalMagic = 0x177245385090
var crctab [256]uint32
invalidNodeValue is an invalid index which marks a leaf node in the tree.
const invalidNodeValue = 0xffff
A StructuralError is returned when the bzip2 data is found to be syntactically invalid.
type StructuralError string
moveToFrontDecoder implements a move-to-front list. Such a list is an efficient way to transform a string with repeating elements into one with many small valued numbers, which is suitable for entropy encoding. It works by starting with an initial list of symbols and references symbols by their index into that list. When a symbol is referenced, it's moved to the front of the list. Thus, a repeated symbol ends up being encoded with many zeros, as the symbol will be at the front of the list after the first access.
type moveToFrontDecoder []byte
bitReader wraps an io.Reader and provides the ability to read values, bit-by-bit, from it. Its Read* methods don't return the usual error because the error handling was verbose. Instead, any error is kept and can be checked afterwards.
type bitReader struct {
r io.ByteReader
n uint64
bits uint
err error
}
huffmanCode contains a symbol, its code and code length.
type huffmanCode struct {
code uint32
codeLen uint8
value uint16
}
A huffmanNode is a node in the tree. left and right contain indexes into the nodes slice of the tree. If left or right is invalidNodeValue then the child is a left node and its value is in leftValue/rightValue. The symbols are uint16s because bzip2 encodes not only MTF indexes in the tree, but also two magic values for run-length encoding and an EOF symbol. Thus there are more than 256 possible symbols.
type huffmanNode struct {
left uint16
right uint16
leftValue uint16
rightValue uint16
}
huffmanSymbolLengthPair contains a symbol and its code length.
type huffmanSymbolLengthPair struct {
value uint16
length uint8
}
A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a symbol.
type huffmanTree struct {
nodes []huffmanNode
nextNode int
}
A reader decompresses bzip2 compressed data.
type reader struct {
br bitReader
fileCRC uint32
blockCRC uint32
wantBlockCRC uint32
setupDone bool
eof bool
blockSize int
c [256]uint
tt []uint32
tPos uint32
preRLE []uint32
preRLEUsed int
lastByte int
byteRepeats uint
repeats uint
}
Decode reads bits from the given bitReader and navigates the tree until a symbol is found.
func (t *huffmanTree) Decode(br *bitReader) (v uint16)
func (m moveToFrontDecoder) Decode(n int) (b byte)
func (br *bitReader) Err() error
func (s StructuralError) Error() string
First returns the symbol at the front of the list.
func (m moveToFrontDecoder) First() byte
NewReader returns an io.Reader which decompresses bzip2 data from r. If r does not also implement [io.ByteReader], the decompressor may read more data than necessary from r.
func NewReader(r io.Reader) io.Reader
func (bz2 *reader) Read(buf []byte) (n int, err error)
func (br *bitReader) ReadBit() bool
func (br *bitReader) ReadBits(bits uint) (n int)
ReadBits64 reads the given number of bits and returns them in the least-significant part of a uint64. In the event of an error, it returns 0 and the error can be obtained by calling bitReader.Err().
func (br *bitReader) ReadBits64(bits uint) (n uint64)
buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in the Huffman tree at the given level. It returns the index of the newly constructed node.
func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error)
func init()
inverseBWT implements the inverse Burrows-Wheeler transform as described in http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2. In that document, origPtr is called “I” and c is the “C” array after the first pass over the data. It's an argument here because we merge the first pass with the Huffman decoding. This also implements the “single array” method from the bzip2 source code which leaves the output, still shuffled, in the bottom 8 bits of tt with the index of the next byte in the top 24-bits. The index of the first byte is returned.
func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32
newBitReader returns a new bitReader reading from r. If r is not already an io.ByteReader, it will be converted via a bufio.Reader.
func newBitReader(r io.Reader) bitReader
newHuffmanTree builds a Huffman tree from a slice containing the code lengths of each symbol. The maximum code length is 32 bits.
func newHuffmanTree(lengths []uint8) (huffmanTree, error)
newMTFDecoder creates a move-to-front decoder with an explicit initial list of symbols.
func newMTFDecoder(symbols []byte) moveToFrontDecoder
newMTFDecoderWithRange creates a move-to-front decoder with an initial symbol list of 0...n-1.
func newMTFDecoderWithRange(n int) moveToFrontDecoder
func (bz2 *reader) read(buf []byte) (int, error)
readBlock reads a bzip2 block. The magic number should already have been consumed.
func (bz2 *reader) readBlock() (err error)
func (bz2 *reader) readFromBlock(buf []byte) int
setup parses the bzip2 header.
func (bz2 *reader) setup(needMagic bool) error
updateCRC updates the crc value to incorporate the data in b. The initial value is 0.
func updateCRC(val uint32, b []byte) uint32
Generated with Arrow