Imports #
"io"
"math/bits"
"io"
"math/bits"
"encoding/binary"
"encoding/binary"
"math/bits"
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
"io"
"math/bits"
"io"
"math/bits"
"encoding/binary"
"encoding/binary"
"math/bits"
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
debug can be set in the source to print debug info using println.
const debug = false
fuzzing is a fuzzer hook set to true when fuzzing. This is used to reject cases where we don't match zstd.
var fuzzing = false
var literalLengthBase = []uint32{...}
const literalLengthOffset = 16
var matchLengthBase = []uint32{...}
const matchLengthOffset = 32
maxHuffmanBits is the largest possible Huffman table bits.
const maxHuffmanBits = 11
predefinedLiteralTable is the predefined table to use for literal lengths. Generated from table in RFC 3.1.1.3.2.2.1. Checked by TestPredefinedTables.
var predefinedLiteralTable = [...]fseBaselineEntry{...}
predefinedMatchTable is the predefined table to use for match lengths. Generated from table in RFC 3.1.1.3.2.2.2. Checked by TestPredefinedTables.
var predefinedMatchTable = [...]fseBaselineEntry{...}
predefinedOffsetTable is the predefined table to use for offsets. Generated from table in RFC 3.1.1.3.2.2.3. Checked by TestPredefinedTables.
var predefinedOffsetTable = [...]fseBaselineEntry{...}
seqCodeInfo is the seqCodeInfoData for each kind of sequence code.
var seqCodeInfo = [3]seqCodeInfoData{...}
const seqLiteral seqCode = iota
const seqMatch
const seqOffset
const xxhPrime64c1 = 0x9e3779b185ebca87
const xxhPrime64c2 = 0xc2b2ae3d27d4eb4f
const xxhPrime64c3 = 0x165667b19e3779f9
const xxhPrime64c4 = 0x85ebca77c2b2ae63
const xxhPrime64c5 = 0x27d4eb2f165667c5
block is the data for a single compressed block. The data starts immediately after the 3 byte block header, and is Block_Size bytes long.
type block []byte
seqCode is the kind of sequence codes we have to handle.
type seqCode int
Reader implements [io.Reader] to read a zstd compressed stream.
type Reader struct {
r io.Reader
sawFrameHeader bool
hasChecksum bool
readOneFrame bool
frameSizeUnknown bool
remainingFrameSize uint64
blockOffset int64
buffer []byte
off int
repeatedOffset1 uint32
repeatedOffset2 uint32
repeatedOffset3 uint32
huffmanTable []uint16
huffmanTableBits int
window window
compressedBuf []byte
literals []byte
seqTables [3][]fseBaselineEntry
seqTableBits [3]uint8
seqTableBuffers [3][]fseBaselineEntry
scratch [16]byte
fseScratch []fseEntry
checksum xxhash64
}
bitReader reads a bit stream going forward.
type bitReader struct {
r *Reader
data block
off uint32
bits uint32
cnt uint32
}
fseBaselineEntry is an entry in an FSE baseline table. We use these for literal/match/length values. Those require mapping the symbol to a baseline value, and then reading zero or more bits and adding the value to the baseline. Rather than looking these up in separate tables, we convert the FSE table to an FSE baseline table.
type fseBaselineEntry struct {
baseline uint32
basebits uint8
bits uint8
base uint16
}
fseEntry is one entry in an FSE table.
type fseEntry struct {
sym uint8
bits uint8
base uint16
}
reverseBitReader reads a bit stream in reverse.
type reverseBitReader struct {
r *Reader
data block
off uint32
start uint32
bits uint32
cnt uint32
}
seqCodeInfoData is the information needed to set up seqTables and seqTableBits for a particular kind of sequence code.
type seqCodeInfoData struct {
predefTable []fseBaselineEntry
predefTableBits int
maxSym int
maxBits int
toBaseline func(*Reader, int, []fseEntry, []fseBaselineEntry) error
}
window stores up to size bytes of data. It is implemented as a circular buffer: sequential save calls append to the data slice until its length reaches configured size and after that, save calls overwrite previously saved data at off and update off such that it always points at the byte stored before others.
type window struct {
size int
data []byte
off int
}
xxhash64 is the state of a xxHash-64 checksum.
type xxhash64 struct {
len uint64
v [4]uint64
buf [32]byte
cnt int
}
zstdError is an error while decompressing.
type zstdError struct {
offset int64
err error
}
func (ze *zstdError) Error() string
NewReader creates a new Reader that decompresses data from the given reader.
func NewReader(input io.Reader) *Reader
Read implements [io.Reader].
func (r *Reader) Read(p []byte) (int, error)
ReadByte implements [io.ByteReader].
func (r *Reader) ReadByte() (byte, error)
Reset discards the current state and starts reading a new stream from r. This permits reusing a Reader rather than allocating a new one.
func (r *Reader) Reset(input io.Reader)
func (ze *zstdError) Unwrap() error
appendTo appends stored bytes between from and to indices to the buf. Index from must be less or equal to index to and to must be less or equal to w.len().
func (w *window) appendTo(buf []byte, from uint32, to uint32) []byte
backup steps back to the last byte we used.
func (br *bitReader) backup()
buildFSE builds an FSE decoding table from a list of probabilities. The probabilities are in norm. next is scratch space. The number of bits in the table is tableBits.
func (r *Reader) buildFSE(off int, norm []int16, table []fseEntry, tableBits int) error
compressedBlock decompresses a compressed block, storing the decompressed data in r.buffer. The blockSize argument is the compressed size. RFC 3.1.1.3.
func (r *Reader) compressedBlock(blockSize int) error
Copy match bytes from the decoded output, or the window, at offset.
func (r *Reader) copyFromWindow(rbr *reverseBitReader, offset uint32, match uint32) error
digest returns the final hash value.
func (xh *xxhash64) digest() uint64
execSeqs reads and executes the sequences. RFC 3.1.1.3.2.1.2.
func (r *Reader) execSeqs(data block, off int, litbuf []byte, seqCount int) error
fetch is called to ensure that at least b bits are available. It reports false if this can't be done, in which case only rbr.cnt bits are available.
func (rbr *reverseBitReader) fetch(b uint8) bool
initSeqs reads the Sequences_Section_Header and sets up the FSE tables used to read the sequence codes. It returns the number of sequences and the new offset. RFC 3.1.1.3.2.1.
func (r *Reader) initSeqs(data block, off int) (int, int, error)
len returns the number of stored bytes.
func (w *window) len() uint32
makeBitReader makes a bit reader starting at off.
func (r *Reader) makeBitReader(data block, off int) bitReader
func (r *Reader) makeEOFError(off int) error
func (r *Reader) makeError(off int, msg string) error
makeError returns an error at the current offset wrapping a string.
func (br *bitReader) makeError(msg string) error
makeError returns an error at the current offset wrapping a string.
func (rbr *reverseBitReader) makeError(msg string) error
makeLiteralBaselineFSE converts the literal length fseTable to baselineTable.
func (r *Reader) makeLiteralBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error
makeMatchBaselineFSE converts the match length fseTable to baselineTable.
func (r *Reader) makeMatchBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error
makeOffsetBaselineFSE converts the offset length fseTable to baselineTable.
func (r *Reader) makeOffsetBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error
makeReverseBitReader makes a reverseBitReader reading backward from off to start. The bitstream starts with a 1 bit in the last byte, at off.
func (r *Reader) makeReverseBitReader(data block, off int, start int) (reverseBitReader, error)
mergeRound updates a value in the final round.
func (xh *xxhash64) mergeRound(v uint64, n uint64) uint64
moreBits is called to read more bits. This ensures that at least 16 bits are available.
func (br *bitReader) moreBits() error
readBlock reads the next block from a frame.
func (r *Reader) readBlock() error
readFSE reads an FSE table from data starting at off.
maxSym is the maximum symbol value.
maxBits is the maximum number of bits permitted for symbols in the table.
The FSE is written into table, which must be at least 1<func (r *Reader) readFSE(data block, off int, maxSym int, maxBits int, table []fseEntry) (tableBits int, roff int, err error)
readFrameHeader reads the frame header and prepares to read a block.
func (r *Reader) readFrameHeader() error
readHuff reads Huffman table from data starting at off into table. Each entry in a Huffman table is a pair of bytes. The high byte is the encoded value. The low byte is the number of bits used to encode that value. We index into the table with a value of size tableBits. A value that requires fewer bits appear in the table multiple times. This returns the number of bits in the Huffman table and the new offset. RFC 4.2.1.
func (r *Reader) readHuff(data block, off int, table []uint16) (tableBits int, roff int, err error)
readHuffLiterals reads and decompresses a Compressed_Literals_Block or a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)
readLiterals reads and decompresses the literals from data at off. The literals are appended to outbuf, which is returned. Also returns the new input offset. RFC 3.1.1.3.1.
func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error)
readLiteralsFourStreams reads four interleaved streams of compressed literals.
func (r *Reader) readLiteralsFourStreams(data block, off int, totalStreamsSize int, regeneratedSize int, outbuf []byte) ([]byte, error)
readLiteralsOneStream reads a single stream of compressed literals.
func (r *Reader) readLiteralsOneStream(data block, off int, compressedSize int, regeneratedSize int, outbuf []byte) ([]byte, error)
readRawRLELiterals reads and decompresses a Raw_Literals_Block or a RLE_Literals_Block. RFC 3.1.1.3.1.1.
func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)
refill reads and decompresses the next block.
func (r *Reader) refill() error
refillIfNeeded reads the next block if necessary.
func (r *Reader) refillIfNeeded() error
reset clears stored data and configures window size.
func (w *window) reset(size int)
reset discards the current state and prepares to compute a new hash. We assume a seed of 0 since that is what zstd uses.
func (xh *xxhash64) reset()
round updates a value.
func (xh *xxhash64) round(v uint64, n uint64) uint64
save stores up to size last bytes from the buf.
func (w *window) save(buf []byte)
setBufferSize sets the decompressed buffer size. When this is called the buffer is empty.
func (r *Reader) setBufferSize(size int)
setSeqTable uses the Compression_Mode in mode to set up r.seqTables and r.seqTableBits for kind. We store these in the Reader because one of the modes simply reuses the value from the last block in the frame.
func (r *Reader) setSeqTable(data block, off int, kind seqCode, mode byte) (int, error)
skipFrame skips a skippable frame. RFC 3.1.2.
func (r *Reader) skipFrame() error
update adds a buffer to the has.
func (xh *xxhash64) update(b []byte)
val is called to fetch a value of b bits.
func (br *bitReader) val(b uint8) uint32
val is called to fetch a value of b bits.
func (rbr *reverseBitReader) val(b uint8) (uint32, error)
func (r *Reader) wrapError(off int, err error) error
func (r *Reader) wrapNonEOFError(off int, err error) error
Generated with Arrow