Imports #
"unicode/utf8"
"golang.org/x/text/transform"
"sync"
"sync"
"sync"
"sync"
"unicode/utf8"
"golang.org/x/text/transform"
"encoding/binary"
"fmt"
"unicode/utf8"
"io"
"sync"
"sync"
"unicode/utf8"
"unicode/utf8"
"unicode/utf8"
"golang.org/x/text/transform"
"sync"
"sync"
"sync"
"sync"
"unicode/utf8"
"golang.org/x/text/transform"
"encoding/binary"
"fmt"
"unicode/utf8"
"io"
"sync"
"sync"
"unicode/utf8"
"unicode/utf8"
GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"MaxSegmentSize is the maximum size of a byte buffer needed to consider any sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSizeMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprMaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExprconst NFC Form = iotaconst NFDconst NFKCconst NFKDVersion is the Unicode edition from which the tables are derived.
const Version = "12.0.0"Version is the Unicode edition from which the tables are derived.
const Version = "11.0.0"Version is the Unicode edition from which the tables are derived.
const Version = "10.0.0"Version is the Unicode edition from which the tables are derived.
const Version = "13.0.0"Version is the Unicode edition from which the tables are derived.
const Version = "15.0.0"Version is the Unicode edition from which the tables are derived.
const Version = "9.0.0"var ccc = [55]uint8{...}var ccc = [56]uint8{...}var ccc = [55]uint8{...}var ccc = [55]uint8{...}var ccc = [56]uint8{...}var ccc = [55]uint8{...}decomps: 19128 bytes
var decomps = [...]byte{...}decomps: 19105 bytes
var decomps = [...]byte{...}decomps: 19426 bytes
var decomps = [...]byte{...}decomps: 19105 bytes
var decomps = [...]byte{...}decomps: 19108 bytes
var decomps = [...]byte{...}decomps: 19105 bytes
var decomps = [...]byte{...}const endMulti = 0x2F77const endMulti = 0x2F60const endMulti = 0x2F60const endMulti = 0x30A1const endMulti = 0x2F63const endMulti = 0x2F60var errs = []error{...}const firstCCC = 0x2C9Econst firstCCC = 0x2DD5const firstCCC = 0x2C9Econst firstCCC = 0x2CABconst firstCCC = 0x2CA1const firstCCC = 0x2C9Econst firstCCCZeroExcept = 0x4A78const firstCCCZeroExcept = 0x4A78const firstCCCZeroExcept = 0x4A7Bconst firstCCCZeroExcept = 0x4A78const firstCCCZeroExcept = 0x4A8Fconst firstCCCZeroExcept = 0x4BB9const firstLeadingCCC = 0x49AEconst firstLeadingCCC = 0x49AEconst firstLeadingCCC = 0x4AEFconst firstLeadingCCC = 0x49AEconst firstLeadingCCC = 0x49C5const firstLeadingCCC = 0x49B1const firstMulti = 0x199Aconst firstMulti = 0x186Dconst firstMulti = 0x186Dconst firstMulti = 0x1870const firstMulti = 0x186Dconst firstMulti = 0x186Dconst firstStarterWithNLead = 0x4A9Fconst firstStarterWithNLead = 0x4A9Fconst firstStarterWithNLead = 0x4BE0const firstStarterWithNLead = 0x4A9Fconst firstStarterWithNLead = 0x4AB6const firstStarterWithNLead = 0x4AA2var formTable = []*formInfo{...}For Hangul we combine algorithmically, instead of using tables.
const hangulBase = 0xAC00For Hangul we combine algorithmically, instead of using tables.
const hangulBase0 = 0xEAFor Hangul we combine algorithmically, instead of using tables.
const hangulBase1 = 0xB0For Hangul we combine algorithmically, instead of using tables.
const hangulBase2 = 0x80For Hangul we combine algorithmically, instead of using tables.
const hangulEnd = *ast.BinaryExprFor Hangul we combine algorithmically, instead of using tables.
const hangulEnd0 = 0xEDFor Hangul we combine algorithmically, instead of using tables.
const hangulEnd1 = 0x9EFor Hangul we combine algorithmically, instead of using tables.
const hangulEnd2 = 0xA4const hangulUTF8Size = 3const headerFlagsMask = 0xC0const headerLenMask = 0x3Fconst iShortDstconst iShortSrcconst iSuccess insertErr = *ast.UnaryExprFor Hangul we combine algorithmically, instead of using tables.
const jamoLBase = 0x1100For Hangul we combine algorithmically, instead of using tables.
const jamoLBase0 = 0xE1For Hangul we combine algorithmically, instead of using tables.
const jamoLBase1 = 0x84For Hangul we combine algorithmically, instead of using tables.
const jamoLEnd = 0x1113For Hangul we combine algorithmically, instead of using tables.
const jamoLVTCount = *ast.BinaryExprFor Hangul we combine algorithmically, instead of using tables.
const jamoTBase = 0x11A7For Hangul we combine algorithmically, instead of using tables.
const jamoTCount = 28For Hangul we combine algorithmically, instead of using tables.
const jamoTEnd = 0x11C3For Hangul we combine algorithmically, instead of using tables.
const jamoVBase = 0x1161For Hangul we combine algorithmically, instead of using tables.
const jamoVCount = 21For Hangul we combine algorithmically, instead of using tables.
const jamoVEnd = 0x1176For Hangul we combine algorithmically, instead of using tables.
const jamoVTCount = *ast.BinaryExprconst lastDecomp = 0x4AA4const lastDecomp = 0x4AA1const lastDecomp = 0x4AA1const lastDecomp = 0x4AB8const lastDecomp = 0x4BE2const lastDecomp = 0x4AA1The maximum number of characters needed for a buffer is maxNonStarters + 1 for the starter + 1 for the GCJ
const maxBufferSize = *ast.BinaryExprconst maxByteBufferSize = *ast.BinaryExprconst maxDecomp = 0x8000const maxDecomp = 0x8000const maxDecomp = 0x8000const maxDecomp = 0x8000const maxDecomp = 0x8000const maxDecomp = 0x8000const maxNFCExpansion = 3const maxNFKCExpansion = 18const maxNonStarters = 30var nfcData = *ast.CallExprnfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}var nfcSparse = sparseBlocks{...}nfcSparseOffset: 151 entries, 302 bytes
var nfcSparseOffset = []uint16{...}nfcSparseOffset: 142 entries, 284 bytes
var nfcSparseOffset = []uint16{...}nfcSparseOffset: 163 entries, 326 bytes
var nfcSparseOffset = []uint16{...}nfcSparseOffset: 149 entries, 298 bytes
var nfcSparseOffset = []uint16{...}nfcSparseOffset: 156 entries, 312 bytes
var nfcSparseOffset = []uint16{...}nfcSparseOffset: 145 entries, 290 bytes
var nfcSparseOffset = []uint16{...}nfcSparseValues: 684 entries, 2736 bytes
var nfcSparseValues = [684]valueRange{...}nfcSparseValues: 689 entries, 2756 bytes
var nfcSparseValues = [689]valueRange{...}nfcSparseValues: 688 entries, 2752 bytes
var nfcSparseValues = [688]valueRange{...}nfcSparseValues: 730 entries, 2920 bytes
var nfcSparseValues = [730]valueRange{...}nfcSparseValues: 704 entries, 2816 bytes
var nfcSparseValues = [704]valueRange{...}nfcSparseValues: 682 entries, 2728 bytes
var nfcSparseValues = [682]valueRange{...}nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}nfcValues: 46 blocks, 2944 entries, 5888 bytes The third block is the zero block.
var nfcValues = [2944]uint16{...}nfcValues: 47 blocks, 3008 entries, 6016 bytes The third block is the zero block.
var nfcValues = [3008]uint16{...}nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}var nfkcData = *ast.CallExprnfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}nfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}nfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}var nfkcSparse = sparseBlocks{...}nfkcSparseOffset: 158 entries, 316 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseOffset: 176 entries, 352 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseOffset: 170 entries, 340 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseOffset: 155 entries, 310 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseOffset: 162 entries, 324 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseOffset: 164 entries, 328 bytes
var nfkcSparseOffset = []uint16{...}nfkcSparseValues: 877 entries, 3508 bytes
var nfkcSparseValues = [877]valueRange{...}nfkcSparseValues: 869 entries, 3476 bytes
var nfkcSparseValues = [869]valueRange{...}nfkcSparseValues: 895 entries, 3580 bytes
var nfkcSparseValues = [895]valueRange{...}nfkcSparseValues: 875 entries, 3500 bytes
var nfkcSparseValues = [875]valueRange{...}nfkcSparseValues: 919 entries, 3676 bytes
var nfkcSparseValues = [919]valueRange{...}nfkcSparseValues: 871 entries, 3484 bytes
var nfkcSparseValues = [871]valueRange{...}nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}nfkcValues: 92 blocks, 5888 entries, 11776 bytes The third block is the zero block.
var nfkcValues = [5888]uint16{...}nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}nfkcValues: 93 blocks, 5952 entries, 11904 bytes The third block is the zero block.
var nfkcValues = [5952]uint16{...}nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}nfkcValues: 97 blocks, 6208 entries, 12416 bytes The third block is the zero block.
var nfkcValues = [6208]uint16{...}const qcInfoMask = 0x3FrecompMap: 7520 bytes (entries only)
var recompMap map[uint32]runerecompMap: 7520 bytes (entries only)
var recompMap map[uint32]runerecompMap: 7520 bytes (entries only)
var recompMap map[uint32]runerecompMap: 7528 bytes (entries only)
var recompMap map[uint32]runerecompMap: 7520 bytes (entries only)
var recompMap map[uint32]runerecompMap: 7528 bytes (entries only)
var recompMap map[uint32]runevar recompMapOnce sync.Oncevar recompMapOnce sync.Oncevar recompMapOnce sync.Oncevar recompMapOnce sync.Oncevar recompMapOnce sync.Oncevar recompMapOnce sync.Onceconst recompMapPacked = *ast.BinaryExprconst recompMapPacked = *ast.BinaryExprconst recompMapPacked = *ast.BinaryExprconst recompMapPacked = *ast.BinaryExprconst recompMapPacked = *ast.BinaryExprconst recompMapPacked = *ast.BinaryExprIndicates a rune caused a segment overflow and a CGJ should be inserted.
const ssOverflowIndicates a rune starts a new segment and should not be added.
const ssStarterIndicates a rune was successfully added to the segment.
const ssSuccess ssState = iotaA Form denotes a canonical representation of Unicode code points. The Unicode-defined normalization and equivalence forms are: NFC Unicode Normalization Form C NFD Unicode Normalization Form D NFKC Unicode Normalization Form KC NFKD Unicode Normalization Form KD For a Form f, this documentation uses the notation f(x) to mean the bytes or string x converted to the given form. A position n in x is called a boundary if conversion to the form can proceed independently on both sides: f(x) == append(f(x[0:n]), f(x[n:])...) References: https://unicode.org/reports/tr15/ and https://unicode.org/notes/tn5/.
type Form intinsertErr is an error code returned by insert. Using this type instead of error improves performance up to 20% for many of the benchmarks.
type insertErr inttype iterFunc func(*Iter) []bytefunctions dispatchable per form
type lookupFunc func(b input, i int) PropertiesWe pack quick check data in 4 bits: 5: Combines forward (0 == false, 1 == true) 4..3: NFC_QC Yes(00), No (10), or Maybe (11) 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. 1..0: Number of trailing non-starters. When all 4 bits are zero, the character is inert, meaning it is never influenced by normalization.
type qcInfo uint8ssState is used for reporting the segment state after inserting a rune. It is returned by streamSafe.next.
type ssState intstreamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8An Iter iterates over a string or byte slice, while normalizing it to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties
next iterFunc
asciiF iterFunc
p int
multiSeg []byte
}Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8
size uint8
ccc uint8
tccc uint8
nLead uint8
flags qcInfo
index uint16
}formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing bool
compatibility bool
info lookupFunc
nextMain iterFunc
}type input struct {
str string
bytes []byte
}nfcTrie. Total size: 10798 bytes (10.54 KiB). Checksum: b5981cc85e3bd14.
type nfcTrie struct {
}nfcTrie. Total size: 10610 bytes (10.36 KiB). Checksum: 95e8869a9f81e5e6.
type nfcTrie struct {
}nfcTrie. Total size: 10332 bytes (10.09 KiB). Checksum: 51cc525b297fc970.
type nfcTrie struct {
}nfcTrie. Total size: 10442 bytes (10.20 KiB). Checksum: 4ba400a9d8208e03.
type nfcTrie struct {
}nfcTrie. Total size: 10680 bytes (10.43 KiB). Checksum: a555db76d4becdd2.
type nfcTrie struct {
}nfcTrie. Total size: 10586 bytes (10.34 KiB). Checksum: dd926e82067bee11.
type nfcTrie struct {
}nfkcTrie. Total size: 18684 bytes (18.25 KiB). Checksum: 113e23c477adfabd.
type nfkcTrie struct {
}nfkcTrie. Total size: 18768 bytes (18.33 KiB). Checksum: c51186dd2412943d.
type nfkcTrie struct {
}nfkcTrie. Total size: 17248 bytes (16.84 KiB). Checksum: 4fb368372b6b1b27.
type nfkcTrie struct {
}nfkcTrie. Total size: 16994 bytes (16.60 KiB). Checksum: c3ed54ee046f3c46.
type nfkcTrie struct {
}nfkcTrie. Total size: 19260 bytes (18.81 KiB). Checksum: 1a0bbc4c8c24da49.
type nfkcTrie struct {
}nfkcTrie. Total size: 17104 bytes (16.70 KiB). Checksum: d985061cf5307b35.
type nfkcTrie struct {
}type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}reorderBuffer is used to normalize a single segment. Characters inserted with insert are decomposed and reordered based on CCC. The compose method can be used to recombine characters. Note that the byte buffer does not hold the UTF-8 characters in order. Only the rune array is maintained in sorted order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties
byte [maxByteBufferSize]byte
nbyte uint8
ss streamSafe
nrune int
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}type sparseBlocks struct {
values []valueRange
offset []uint16
}type valueRange struct {
value uint16
lo byte
hi byte
}Append returns f(append(out, b...)). The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byteAppendString returns f(append(out, []byte(s))). The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byteBoundaryAfter returns true if runes cannot combine with or otherwise interact with this or previous runes.
func (p Properties) BoundaryAfter() boolBoundaryBefore returns true if this rune starts a new segment and cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() boolBytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byteCCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() errorDecomposition returns the decomposition for the underlying rune or nil if there is none.
func (p Properties) Decomposition() []byteDone returns true if there is no more input to process.
func (i *Iter) Done() boolFirstBoundary returns the position i of the first boundary in b or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) intFirstBoundaryInString returns the position i of the first boundary in s or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) intInit initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte)InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string)IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) boolIsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) boolLastBoundary returns the position i of the last boundary in b or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) intLeadCCC returns the CCC of the first rune in the decomposition. If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input. For any input a and b for which f(a) == f(b), subsequent calls to Next will return the same segments. Modifying runes are grouped together with the preceding starter, if such a starter exists. Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byteNextBoundary reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) intNextBoundaryInString reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) intPos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() intProperties returns properties for the first rune in s.
func (f Form) Properties(s []byte) PropertiesPropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) PropertiesQuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) intQuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) intRead implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error)Reader returns a new reader that implements Read by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.ReaderReset implements the Reset method of the transform.Transformer interface.
func (Form) Reset()Seek sets the segment to be returned by the next call to Next to start at position p. It is the responsibility of the caller to set p to the start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error)Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() intSpan implements transform.SpanningTransformer. It returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error)SpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error)String returns f(s).
func (f Form) String(s string) stringTrailCCC returns the CCC of the last rune in the decomposition. If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8Transform implements the Transform method of the transform.Transformer interface. It may need to write segments of up to MaxSegmentSize at once. Users should either catch ErrShortDst and allow dst to grow or have dst be at least of size MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst []byte, src []byte, atEOF bool) (nDst int, nSrc int, err error)Write implements the standard write interface. If the last characters are not at a normalization boundary, the bytes will be buffered for the next write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error)Writer returns a new writer that implements Write(b) by writing f(b) to w. The returned writer may use an internal buffer to maintain state across Write calls. Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloserfunc (in *input) _byte(p int) byteappendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) boolfunc appendQuick(rb *reorderBuffer, i int) intappendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune)func (in *input) appendSlice(buf []byte, b int, e int) []byteassignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune)backwards is used for checking for overflow and segment starts when traversing a string backwards. Users do not need to call first for the first rune. The state of the streamSafe retains the count of the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssStatefunc buildRecompMap()bytesAt returns the UTF-8 encoding of the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []bytefunc (in *input) charinfoNFC(p int) (uint16, int)func (in *input) charinfoNFKC(p int) (uint16, int)func cmpNormalBytes(rb *reorderBuffer) boolcombine returns the combined rune or 0 if it doesn't exist. The caller is responsible for calling recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(a rune, b rune) runecombineHangul algorithmically combines Jamo character components into Hangul. See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s int, i int, k int)func (p Properties) combinesBackward() boolfunc (p Properties) combinesForward() boolcompInfo converts the information contained in v and sz to a Properties. See the comment at the top of the file for more information on the format.
func compInfo(v uint16, sz int) Propertiescompose recombines the runes in the buffer. It should only be used to recompose a single segment, as it will not handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose()func (in *input) copySlice(buf []byte, b int, e int) intdecomposeHangul algorithmically decomposes a Hangul rune into its Jamo components. See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune)decomposeHangul writes the decomposed Hangul to buf and returns the number of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) intdecomposeSegment scans the first segment in src into rb. It inserts 0x034f (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) intdecomposeToLastBoundary finds an open segment at the end of the buffer and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer)func (f Form) doAppend(out []byte, src input, n int) []bytefunc doAppend(rb *reorderBuffer, out []byte, p int) []bytefunc doAppendInner(rb *reorderBuffer, p int) []bytefunc (rb *reorderBuffer) doFlush() boolfunc doNormComposed(i *Iter) []bytefunc doNormDecomposed(i *Iter) []bytefirst inserts the first rune of a segment. It is a faster version of next if it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties)func (f Form) firstBoundary(src input, nsrc int) intflush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byteflushCopy copies the normalized segment to buf and resets rb. It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) intfunc flushTransform(rb *reorderBuffer) boolfunc (in *input) hangul(p int) (r rune)func (p Properties) hasDecomposition() boolfunc (rb *reorderBuffer) init(f Form, src []byte)func (rb *reorderBuffer) initString(f Form, src string)func inputBytes(str []byte) inputfunc inputString(str string) inputinsertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ()insertDecomposed inserts an entry in to the reorderBuffer for each rune in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes. It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErrinsertFlush inserts the given rune in the buffer ordered by CCC. If a decomposition with multiple segments are encountered, they leading ones are flushed. It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErrinsertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class. It returns false if the buffer is not large enough to hold the rune. It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties)insertSingle inserts an entry in the reorderBuffer for the rune at position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties)insertUnsafe inserts the given rune in the buffer ordered by CCC. It is assumed there is sufficient space to hold the runes. It is the responsibility of the caller to ensure this. This can be done by checking the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties)func isHangul(b []byte) boolfunc isHangulString(b string) boolfunc isHangulWithoutJamoT(b []byte) boolfunc (p Properties) isInert() boolCaller must ensure len(b) >= 2.
func isJamoVT(b []byte) boolfunc (ss streamSafe) isMax() boolfunc (p Properties) isYesC() boolfunc (p Properties) isYesD() boolfunc lastBoundary(fd *formInfo, b []byte) intlastRuneStart returns the runeInfo and position of the last rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)lookup determines the type of block n and looks up the value for b. For n < t.cutoff, the block is a simple lookup table. Otherwise, the block is a list of ranges with an accompanying value. Given a matching range r, the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)func lookupInfoNFC(b input, i int) Propertiesfunc lookupInfoNFKC(b input, i int) PropertieslookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16func (p Properties) multiSegment() boolfunc (p Properties) nLeadingNonStarters() uint8func (p Properties) nTrailingNonStarters() uint8func newNfcTrie(i int) *nfcTriefunc newNfcTrie(i int) *nfcTriefunc newNfcTrie(i int) *nfcTriefunc newNfcTrie(i int) *nfcTriefunc newNfcTrie(i int) *nfcTriefunc newNfcTrie(i int) *nfcTriefunc newNfkcTrie(i int) *nfkcTriefunc newNfkcTrie(i int) *nfkcTriefunc newNfkcTrie(i int) *nfkcTriefunc newNfkcTrie(i int) *nfkcTriefunc newNfkcTrie(i int) *nfkcTriefunc newNfkcTrie(i int) *nfkcTrieinsert returns a ssState value to indicate whether a rune represented by p can be inserted.
func (ss *streamSafe) next(p Properties) ssStatefunc nextASCIIBytes(i *Iter) []bytefunc nextASCIIString(i *Iter) []bytefunc (f Form) nextBoundary(src input, nsrc int, atEOF bool) intfunc nextCGJCompose(i *Iter) []bytefunc nextCGJDecompose(i *Iter) []bytenextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []bytenextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte)func nextDone(i *Iter) []bytefunc nextHangul(i *Iter) []bytenextMulti is used for iterating over multi-segment decompositions for decomposing normal forms.
func nextMulti(i *Iter) []bytenextMultiNorm is used for iterating over multi-segment decompositions for composing normal forms.
func nextMultiNorm(i *Iter) []bytepatchTail fixes a case where a rune may be incorrectly normalized if it is followed by illegal continuation bytes. It returns the patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) boolquickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and whether any non-normalized parts were found. If atEOF is false, n will not point past the last segment if this segment might be become non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i int, end int, atEOF bool) (n int, ok bool)reset discards all characters from the buffer.
func (rb *reorderBuffer) reset()returnSlice returns a slice of the underlying input type as a byte slice. If the underlying is of type []byte, it will simply return a slice. If the underlying is of type string, it will copy the slice to the buffer and return that.
func (i *Iter) returnSlice(a int, b int) []byteruneAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) runefunc (in *input) setBytes(str []byte)func (i *Iter) setDone()func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool)func (in *input) setString(str string)func (in *input) skipASCII(p int, max int) intfunc (in *input) skipContinuationBytes(p int) inttransform implements the transform.Transformer interface. It is only called when quickSpan does not pass for a given string.
func (f Form) transform(dst []byte, src []byte, atEOF bool) (nDst int, nSrc int, err error)Generated with Arrow