Imports #
"io"
"math/bits"
"io"
"math/bits"
"encoding/binary"
"encoding/binary"
"math/bits"
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
"io"
"math/bits"
"io"
"math/bits"
"encoding/binary"
"encoding/binary"
"math/bits"
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
debug can be set in the source to print debug info using println.
const debug = falsefuzzing is a fuzzer hook set to true when fuzzing. This is used to reject cases where we don't match zstd.
var fuzzing = falsevar literalLengthBase = []uint32{...}const literalLengthOffset = 16var matchLengthBase = []uint32{...}const matchLengthOffset = 32maxHuffmanBits is the largest possible Huffman table bits.
const maxHuffmanBits = 11predefinedLiteralTable is the predefined table to use for literal lengths. Generated from table in RFC 3.1.1.3.2.2.1. Checked by TestPredefinedTables.
var predefinedLiteralTable = [...]fseBaselineEntry{...}predefinedMatchTable is the predefined table to use for match lengths. Generated from table in RFC 3.1.1.3.2.2.2. Checked by TestPredefinedTables.
var predefinedMatchTable = [...]fseBaselineEntry{...}predefinedOffsetTable is the predefined table to use for offsets. Generated from table in RFC 3.1.1.3.2.2.3. Checked by TestPredefinedTables.
var predefinedOffsetTable = [...]fseBaselineEntry{...}seqCodeInfo is the seqCodeInfoData for each kind of sequence code.
var seqCodeInfo = [3]seqCodeInfoData{...}const seqLiteral seqCode = iotaconst seqMatchconst seqOffsetconst xxhPrime64c1 = 0x9e3779b185ebca87const xxhPrime64c2 = 0xc2b2ae3d27d4eb4fconst xxhPrime64c3 = 0x165667b19e3779f9const xxhPrime64c4 = 0x85ebca77c2b2ae63const xxhPrime64c5 = 0x27d4eb2f165667c5block is the data for a single compressed block. The data starts immediately after the 3 byte block header, and is Block_Size bytes long.
type block []byteseqCode is the kind of sequence codes we have to handle.
type seqCode intReader implements [io.Reader] to read a zstd compressed stream.
type Reader struct {
r io.Reader
sawFrameHeader bool
hasChecksum bool
readOneFrame bool
frameSizeUnknown bool
remainingFrameSize uint64
blockOffset int64
buffer []byte
off int
repeatedOffset1 uint32
repeatedOffset2 uint32
repeatedOffset3 uint32
huffmanTable []uint16
huffmanTableBits int
window window
compressedBuf []byte
literals []byte
seqTables [3][]fseBaselineEntry
seqTableBits [3]uint8
seqTableBuffers [3][]fseBaselineEntry
scratch [16]byte
fseScratch []fseEntry
checksum xxhash64
}bitReader reads a bit stream going forward.
type bitReader struct {
r *Reader
data block
off uint32
bits uint32
cnt uint32
}fseBaselineEntry is an entry in an FSE baseline table. We use these for literal/match/length values. Those require mapping the symbol to a baseline value, and then reading zero or more bits and adding the value to the baseline. Rather than looking these up in separate tables, we convert the FSE table to an FSE baseline table.
type fseBaselineEntry struct {
baseline uint32
basebits uint8
bits uint8
base uint16
}fseEntry is one entry in an FSE table.
type fseEntry struct {
sym uint8
bits uint8
base uint16
}reverseBitReader reads a bit stream in reverse.
type reverseBitReader struct {
r *Reader
data block
off uint32
start uint32
bits uint32
cnt uint32
}seqCodeInfoData is the information needed to set up seqTables and seqTableBits for a particular kind of sequence code.
type seqCodeInfoData struct {
predefTable []fseBaselineEntry
predefTableBits int
maxSym int
maxBits int
toBaseline func(*Reader, int, []fseEntry, []fseBaselineEntry) error
}window stores up to size bytes of data. It is implemented as a circular buffer: sequential save calls append to the data slice until its length reaches configured size and after that, save calls overwrite previously saved data at off and update off such that it always points at the byte stored before others.
type window struct {
size int
data []byte
off int
}xxhash64 is the state of a xxHash-64 checksum.
type xxhash64 struct {
len uint64
v [4]uint64
buf [32]byte
cnt int
}zstdError is an error while decompressing.
type zstdError struct {
offset int64
err error
}func (ze *zstdError) Error() stringNewReader creates a new Reader that decompresses data from the given reader.
func NewReader(input io.Reader) *ReaderRead implements [io.Reader].
func (r *Reader) Read(p []byte) (int, error)ReadByte implements [io.ByteReader].
func (r *Reader) ReadByte() (byte, error)Reset discards the current state and starts reading a new stream from r. This permits reusing a Reader rather than allocating a new one.
func (r *Reader) Reset(input io.Reader)func (ze *zstdError) Unwrap() errorappendTo appends stored bytes between from and to indices to the buf. Index from must be less or equal to index to and to must be less or equal to w.len().
func (w *window) appendTo(buf []byte, from uint32, to uint32) []bytebackup steps back to the last byte we used.
func (br *bitReader) backup()buildFSE builds an FSE decoding table from a list of probabilities. The probabilities are in norm. next is scratch space. The number of bits in the table is tableBits.
func (r *Reader) buildFSE(off int, norm []int16, table []fseEntry, tableBits int) errorcompressedBlock decompresses a compressed block, storing the decompressed data in r.buffer. The blockSize argument is the compressed size. RFC 3.1.1.3.
func (r *Reader) compressedBlock(blockSize int) errorCopy match bytes from the decoded output, or the window, at offset.
func (r *Reader) copyFromWindow(rbr *reverseBitReader, offset uint32, match uint32) errordigest returns the final hash value.
func (xh *xxhash64) digest() uint64execSeqs reads and executes the sequences. RFC 3.1.1.3.2.1.2.
func (r *Reader) execSeqs(data block, off int, litbuf []byte, seqCount int) errorfetch is called to ensure that at least b bits are available. It reports false if this can't be done, in which case only rbr.cnt bits are available.
func (rbr *reverseBitReader) fetch(b uint8) boolinitSeqs reads the Sequences_Section_Header and sets up the FSE tables used to read the sequence codes. It returns the number of sequences and the new offset. RFC 3.1.1.3.2.1.
func (r *Reader) initSeqs(data block, off int) (int, int, error)len returns the number of stored bytes.
func (w *window) len() uint32makeBitReader makes a bit reader starting at off.
func (r *Reader) makeBitReader(data block, off int) bitReaderfunc (r *Reader) makeEOFError(off int) errorfunc (r *Reader) makeError(off int, msg string) errormakeError returns an error at the current offset wrapping a string.
func (br *bitReader) makeError(msg string) errormakeError returns an error at the current offset wrapping a string.
func (rbr *reverseBitReader) makeError(msg string) errormakeLiteralBaselineFSE converts the literal length fseTable to baselineTable.
func (r *Reader) makeLiteralBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) errormakeMatchBaselineFSE converts the match length fseTable to baselineTable.
func (r *Reader) makeMatchBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) errormakeOffsetBaselineFSE converts the offset length fseTable to baselineTable.
func (r *Reader) makeOffsetBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) errormakeReverseBitReader makes a reverseBitReader reading backward from off to start. The bitstream starts with a 1 bit in the last byte, at off.
func (r *Reader) makeReverseBitReader(data block, off int, start int) (reverseBitReader, error)mergeRound updates a value in the final round.
func (xh *xxhash64) mergeRound(v uint64, n uint64) uint64moreBits is called to read more bits. This ensures that at least 16 bits are available.
func (br *bitReader) moreBits() errorreadBlock reads the next block from a frame.
func (r *Reader) readBlock() errorreadFSE reads an FSE table from data starting at off.
maxSym is the maximum symbol value.
maxBits is the maximum number of bits permitted for symbols in the table.
The FSE is written into table, which must be at least 1<func (r *Reader) readFSE(data block, off int, maxSym int, maxBits int, table []fseEntry) (tableBits int, roff int, err error)
readFrameHeader reads the frame header and prepares to read a block.
func (r *Reader) readFrameHeader() errorreadHuff reads Huffman table from data starting at off into table. Each entry in a Huffman table is a pair of bytes. The high byte is the encoded value. The low byte is the number of bits used to encode that value. We index into the table with a value of size tableBits. A value that requires fewer bits appear in the table multiple times. This returns the number of bits in the Huffman table and the new offset. RFC 4.2.1.
func (r *Reader) readHuff(data block, off int, table []uint16) (tableBits int, roff int, err error)readHuffLiterals reads and decompresses a Compressed_Literals_Block or a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)readLiterals reads and decompresses the literals from data at off. The literals are appended to outbuf, which is returned. Also returns the new input offset. RFC 3.1.1.3.1.
func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error)readLiteralsFourStreams reads four interleaved streams of compressed literals.
func (r *Reader) readLiteralsFourStreams(data block, off int, totalStreamsSize int, regeneratedSize int, outbuf []byte) ([]byte, error)readLiteralsOneStream reads a single stream of compressed literals.
func (r *Reader) readLiteralsOneStream(data block, off int, compressedSize int, regeneratedSize int, outbuf []byte) ([]byte, error)readRawRLELiterals reads and decompresses a Raw_Literals_Block or a RLE_Literals_Block. RFC 3.1.1.3.1.1.
func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error)refill reads and decompresses the next block.
func (r *Reader) refill() errorrefillIfNeeded reads the next block if necessary.
func (r *Reader) refillIfNeeded() errorreset clears stored data and configures window size.
func (w *window) reset(size int)reset discards the current state and prepares to compute a new hash. We assume a seed of 0 since that is what zstd uses.
func (xh *xxhash64) reset()round updates a value.
func (xh *xxhash64) round(v uint64, n uint64) uint64save stores up to size last bytes from the buf.
func (w *window) save(buf []byte)setBufferSize sets the decompressed buffer size. When this is called the buffer is empty.
func (r *Reader) setBufferSize(size int)setSeqTable uses the Compression_Mode in mode to set up r.seqTables and r.seqTableBits for kind. We store these in the Reader because one of the modes simply reuses the value from the last block in the frame.
func (r *Reader) setSeqTable(data block, off int, kind seqCode, mode byte) (int, error)skipFrame skips a skippable frame. RFC 3.1.2.
func (r *Reader) skipFrame() errorupdate adds a buffer to the has.
func (xh *xxhash64) update(b []byte)val is called to fetch a value of b bits.
func (br *bitReader) val(b uint8) uint32val is called to fetch a value of b bits.
func (rbr *reverseBitReader) val(b uint8) (uint32, error)func (r *Reader) wrapError(off int, err error) errorfunc (r *Reader) wrapNonEOFError(off int, err error) errorGenerated with Arrow