zstd

Imports

Imports #

"io"
"math/bits"
"io"
"math/bits"
"encoding/binary"
"encoding/binary"
"math/bits"
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"

Constants & Variables

debug const #

debug can be set in the source to print debug info using println.

const debug = false

fuzzing var #

fuzzing is a fuzzer hook set to true when fuzzing. This is used to reject cases where we don't match zstd.

var fuzzing = false

literalLengthBase var #

var literalLengthBase = []uint32{...}

literalLengthOffset const #

const literalLengthOffset = 16

matchLengthBase var #

var matchLengthBase = []uint32{...}

matchLengthOffset const #

const matchLengthOffset = 32

maxHuffmanBits const #

maxHuffmanBits is the largest possible Huffman table bits.

const maxHuffmanBits = 11

predefinedLiteralTable var #

predefinedLiteralTable is the predefined table to use for literal lengths. Generated from table in RFC 3.1.1.3.2.2.1. Checked by TestPredefinedTables.

var predefinedLiteralTable = [...]fseBaselineEntry{...}

predefinedMatchTable var #

predefinedMatchTable is the predefined table to use for match lengths. Generated from table in RFC 3.1.1.3.2.2.2. Checked by TestPredefinedTables.

var predefinedMatchTable = [...]fseBaselineEntry{...}

predefinedOffsetTable var #

predefinedOffsetTable is the predefined table to use for offsets. Generated from table in RFC 3.1.1.3.2.2.3. Checked by TestPredefinedTables.

var predefinedOffsetTable = [...]fseBaselineEntry{...}

seqCodeInfo var #

seqCodeInfo is the seqCodeInfoData for each kind of sequence code.

var seqCodeInfo = [3]seqCodeInfoData{...}

seqLiteral const #

const seqLiteral seqCode = iota

seqMatch const #

const seqMatch

seqOffset const #

const seqOffset

xxhPrime64c1 const #

const xxhPrime64c1 = 0x9e3779b185ebca87

xxhPrime64c2 const #

const xxhPrime64c2 = 0xc2b2ae3d27d4eb4f

xxhPrime64c3 const #

const xxhPrime64c3 = 0x165667b19e3779f9

xxhPrime64c4 const #

const xxhPrime64c4 = 0x85ebca77c2b2ae63

xxhPrime64c5 const #

const xxhPrime64c5 = 0x27d4eb2f165667c5

Type Aliases

block type #

block is the data for a single compressed block. The data starts immediately after the 3 byte block header, and is Block_Size bytes long.

type block []byte

seqCode type #

seqCode is the kind of sequence codes we have to handle.

type seqCode int

Structs

Reader struct #

Reader implements [io.Reader] to read a zstd compressed stream.

type Reader struct {
r io.Reader
sawFrameHeader bool
hasChecksum bool
readOneFrame bool
frameSizeUnknown bool
remainingFrameSize uint64
blockOffset int64
buffer []byte
off int
repeatedOffset1 uint32
repeatedOffset2 uint32
repeatedOffset3 uint32
huffmanTable []uint16
huffmanTableBits int
window window
compressedBuf []byte
literals []byte
seqTables [3][]fseBaselineEntry
seqTableBits [3]uint8
seqTableBuffers [3][]fseBaselineEntry
scratch [16]byte
fseScratch []fseEntry
checksum xxhash64
}

bitReader struct #

bitReader reads a bit stream going forward.

type bitReader struct {
r *Reader
data block
off uint32
bits uint32
cnt uint32
}

fseBaselineEntry struct #

fseBaselineEntry is an entry in an FSE baseline table. We use these for literal/match/length values. Those require mapping the symbol to a baseline value, and then reading zero or more bits and adding the value to the baseline. Rather than looking these up in separate tables, we convert the FSE table to an FSE baseline table.

type fseBaselineEntry struct {
baseline uint32
basebits uint8
bits uint8
base uint16
}

fseEntry struct #

fseEntry is one entry in an FSE table.

type fseEntry struct {
sym uint8
bits uint8
base uint16
}

reverseBitReader struct #

reverseBitReader reads a bit stream in reverse.

type reverseBitReader struct {
r *Reader
data block
off uint32
start uint32
bits uint32
cnt uint32
}

seqCodeInfoData struct #

seqCodeInfoData is the information needed to set up seqTables and seqTableBits for a particular kind of sequence code.

type seqCodeInfoData struct {
predefTable []fseBaselineEntry
predefTableBits int
maxSym int
maxBits int
toBaseline func(*Reader, int, []fseEntry, []fseBaselineEntry) error
}

window struct #

window stores up to size bytes of data. It is implemented as a circular buffer: sequential save calls append to the data slice until its length reaches configured size and after that, save calls overwrite previously saved data at off and update off such that it always points at the byte stored before others.

type window struct {
size int
data []byte
off int
}

xxhash64 struct #

xxhash64 is the state of a xxHash-64 checksum.

type xxhash64 struct {
len uint64
v [4]uint64
buf [32]byte
cnt int
}

zstdError struct #

zstdError is an error while decompressing.

type zstdError struct {
offset int64
err error
}

Functions

Error method #

func (ze *zstdError) Error() string

NewReader function #

NewReader creates a new Reader that decompresses data from the given reader.

func NewReader(input io.Reader) *Reader

Read method #

Read implements [io.Reader].

func (r *Reader) Read(p []byte) (int, error)

ReadByte method #

ReadByte implements [io.ByteReader].

func (r *Reader) ReadByte() (byte, error)

Reset method #

Reset discards the current state and starts reading a new stream from r. This permits reusing a Reader rather than allocating a new one.

func (r *Reader) Reset(input io.Reader)

Unwrap method #

func (ze *zstdError) Unwrap() error

appendTo method #

appendTo appends stored bytes between from and to indices to the buf. Index from must be less or equal to index to and to must be less or equal to w.len().

func (w *window) appendTo(buf []byte, from uint32, to uint32) []byte

backup method #

backup steps back to the last byte we used.

func (br *bitReader) backup()

buildFSE method #

buildFSE builds an FSE decoding table from a list of probabilities. The probabilities are in norm. next is scratch space. The number of bits in the table is tableBits.

func (r *Reader) buildFSE(off int, norm []int16, table []fseEntry, tableBits int) error

compressedBlock method #

compressedBlock decompresses a compressed block, storing the decompressed data in r.buffer. The blockSize argument is the compressed size. RFC 3.1.1.3.

func (r *Reader) compressedBlock(blockSize int) error

copyFromWindow method #

Copy match bytes from the decoded output, or the window, at offset.

func (r *Reader) copyFromWindow(rbr *reverseBitReader, offset uint32, match uint32) error

digest method #

digest returns the final hash value.

func (xh *xxhash64) digest() uint64

execSeqs method #

execSeqs reads and executes the sequences. RFC 3.1.1.3.2.1.2.

func (r *Reader) execSeqs(data block, off int, litbuf []byte, seqCount int) error

fetch method #

fetch is called to ensure that at least b bits are available. It reports false if this can't be done, in which case only rbr.cnt bits are available.

func (rbr *reverseBitReader) fetch(b uint8) bool

initSeqs method #

initSeqs reads the Sequences_Section_Header and sets up the FSE tables used to read the sequence codes. It returns the number of sequences and the new offset. RFC 3.1.1.3.2.1.

func (r *Reader) initSeqs(data block, off int) (int, int, error)

len method #

len returns the number of stored bytes.

func (w *window) len() uint32

makeBitReader method #

makeBitReader makes a bit reader starting at off.

func (r *Reader) makeBitReader(data block, off int) bitReader

makeEOFError method #

func (r *Reader) makeEOFError(off int) error

makeError method #

func (r *Reader) makeError(off int, msg string) error

makeError method #

makeError returns an error at the current offset wrapping a string.

func (br *bitReader) makeError(msg string) error

makeError method #

makeError returns an error at the current offset wrapping a string.

func (rbr *reverseBitReader) makeError(msg string) error

makeLiteralBaselineFSE method #

makeLiteralBaselineFSE converts the literal length fseTable to baselineTable.

func (r *Reader) makeLiteralBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error

makeMatchBaselineFSE method #

makeMatchBaselineFSE converts the match length fseTable to baselineTable.

func (r *Reader) makeMatchBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error

makeOffsetBaselineFSE method #

makeOffsetBaselineFSE converts the offset length fseTable to baselineTable.

func (r *Reader) makeOffsetBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error

makeReverseBitReader method #

makeReverseBitReader makes a reverseBitReader reading backward from off to start. The bitstream starts with a 1 bit in the last byte, at off.

func (r *Reader) makeReverseBitReader(data block, off int, start int) (reverseBitReader, error)

mergeRound method #

mergeRound updates a value in the final round.

func (xh *xxhash64) mergeRound(v uint64, n uint64) uint64

moreBits method #

moreBits is called to read more bits. This ensures that at least 16 bits are available.

func (br *bitReader) moreBits() error

readBlock method #

readBlock reads the next block from a frame.

func (r *Reader) readBlock() error

readFSE method #

readFSE reads an FSE table from data starting at off. maxSym is the maximum symbol value. maxBits is the maximum number of bits permitted for symbols in the table. The FSE is written into table, which must be at least 1<

func (r *Reader) readFSE(data block, off int, maxSym int, maxBits int, table []fseEntry) (tableBits int, roff int, err error)

readFrameHeader method #

readFrameHeader reads the frame header and prepares to read a block.

func (r *Reader) readFrameHeader() error

readHuff method #

readHuff reads Huffman table from data starting at off into table. Each entry in a Huffman table is a pair of bytes. The high byte is the encoded value. The low byte is the number of bits used to encode that value. We index into the table with a value of size tableBits. A value that requires fewer bits appear in the table multiple times. This returns the number of bits in the Huffman table and the new offset. RFC 4.2.1.

func (r *Reader) readHuff(data block, off int, table []uint16) (tableBits int, roff int, err error)

readHuffLiterals method #

readHuffLiterals reads and decompresses a Compressed_Literals_Block or a Treeless_Literals_Block. RFC 3.1.1.3.1.4.

func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)

readLiterals method #

readLiterals reads and decompresses the literals from data at off. The literals are appended to outbuf, which is returned. Also returns the new input offset. RFC 3.1.1.3.1.

func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error)

readLiteralsFourStreams method #

readLiteralsFourStreams reads four interleaved streams of compressed literals.

func (r *Reader) readLiteralsFourStreams(data block, off int, totalStreamsSize int, regeneratedSize int, outbuf []byte) ([]byte, error)

readLiteralsOneStream method #

readLiteralsOneStream reads a single stream of compressed literals.

func (r *Reader) readLiteralsOneStream(data block, off int, compressedSize int, regeneratedSize int, outbuf []byte) ([]byte, error)

readRawRLELiterals method #

readRawRLELiterals reads and decompresses a Raw_Literals_Block or a RLE_Literals_Block. RFC 3.1.1.3.1.1.

func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)

refill method #

refill reads and decompresses the next block.

func (r *Reader) refill() error

refillIfNeeded method #

refillIfNeeded reads the next block if necessary.

func (r *Reader) refillIfNeeded() error

reset method #

reset clears stored data and configures window size.

func (w *window) reset(size int)

reset method #

reset discards the current state and prepares to compute a new hash. We assume a seed of 0 since that is what zstd uses.

func (xh *xxhash64) reset()

round method #

round updates a value.

func (xh *xxhash64) round(v uint64, n uint64) uint64

save method #

save stores up to size last bytes from the buf.

func (w *window) save(buf []byte)

setBufferSize method #

setBufferSize sets the decompressed buffer size. When this is called the buffer is empty.

func (r *Reader) setBufferSize(size int)

setSeqTable method #

setSeqTable uses the Compression_Mode in mode to set up r.seqTables and r.seqTableBits for kind. We store these in the Reader because one of the modes simply reuses the value from the last block in the frame.

func (r *Reader) setSeqTable(data block, off int, kind seqCode, mode byte) (int, error)

skipFrame method #

skipFrame skips a skippable frame. RFC 3.1.2.

func (r *Reader) skipFrame() error

update method #

update adds a buffer to the has.

func (xh *xxhash64) update(b []byte)

val method #

val is called to fetch a value of b bits.

func (br *bitReader) val(b uint8) uint32

val method #

val is called to fetch a value of b bits.

func (rbr *reverseBitReader) val(b uint8) (uint32, error)

wrapError method #

func (r *Reader) wrapError(off int, err error) error

wrapNonEOFError method #

func (r *Reader) wrapNonEOFError(off int, err error) error

Generated with Arrow