Imports #
"unicode/utf8"
"golang.org/x/text/transform"
"sync"
"sync"
"sync"
"sync"
"unicode/utf8"
"golang.org/x/text/transform"
"encoding/binary"
"fmt"
"unicode/utf8"
"io"
"sync"
"sync"
"unicode/utf8"
"unicode/utf8"
"unicode/utf8"
"golang.org/x/text/transform"
"sync"
"sync"
"sync"
"sync"
"unicode/utf8"
"golang.org/x/text/transform"
"encoding/binary"
"fmt"
"unicode/utf8"
"io"
"sync"
"sync"
"unicode/utf8"
"unicode/utf8"
GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"
MaxSegmentSize is the maximum size of a byte buffer needed to consider any sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSize
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
MaxTransformChunkSize indicates the maximum number of bytes that Transform may need to write atomically for any Form. Making a destination buffer at least this size ensures that Transform can always make progress and that the user does not need to grow the buffer on an ErrShortDst.
const MaxTransformChunkSize = *ast.BinaryExpr
const NFC Form = iota
const NFD
const NFKC
const NFKD
Version is the Unicode edition from which the tables are derived.
const Version = "12.0.0"
Version is the Unicode edition from which the tables are derived.
const Version = "11.0.0"
Version is the Unicode edition from which the tables are derived.
const Version = "10.0.0"
Version is the Unicode edition from which the tables are derived.
const Version = "13.0.0"
Version is the Unicode edition from which the tables are derived.
const Version = "15.0.0"
Version is the Unicode edition from which the tables are derived.
const Version = "9.0.0"
var ccc = [55]uint8{...}
var ccc = [56]uint8{...}
var ccc = [55]uint8{...}
var ccc = [55]uint8{...}
var ccc = [56]uint8{...}
var ccc = [55]uint8{...}
decomps: 19128 bytes
var decomps = [...]byte{...}
decomps: 19105 bytes
var decomps = [...]byte{...}
decomps: 19426 bytes
var decomps = [...]byte{...}
decomps: 19105 bytes
var decomps = [...]byte{...}
decomps: 19108 bytes
var decomps = [...]byte{...}
decomps: 19105 bytes
var decomps = [...]byte{...}
const endMulti = 0x2F77
const endMulti = 0x2F60
const endMulti = 0x2F60
const endMulti = 0x30A1
const endMulti = 0x2F63
const endMulti = 0x2F60
var errs = []error{...}
const firstCCC = 0x2C9E
const firstCCC = 0x2DD5
const firstCCC = 0x2C9E
const firstCCC = 0x2CAB
const firstCCC = 0x2CA1
const firstCCC = 0x2C9E
const firstCCCZeroExcept = 0x4A78
const firstCCCZeroExcept = 0x4A78
const firstCCCZeroExcept = 0x4A7B
const firstCCCZeroExcept = 0x4A78
const firstCCCZeroExcept = 0x4A8F
const firstCCCZeroExcept = 0x4BB9
const firstLeadingCCC = 0x49AE
const firstLeadingCCC = 0x49AE
const firstLeadingCCC = 0x4AEF
const firstLeadingCCC = 0x49AE
const firstLeadingCCC = 0x49C5
const firstLeadingCCC = 0x49B1
const firstMulti = 0x199A
const firstMulti = 0x186D
const firstMulti = 0x186D
const firstMulti = 0x1870
const firstMulti = 0x186D
const firstMulti = 0x186D
const firstStarterWithNLead = 0x4A9F
const firstStarterWithNLead = 0x4A9F
const firstStarterWithNLead = 0x4BE0
const firstStarterWithNLead = 0x4A9F
const firstStarterWithNLead = 0x4AB6
const firstStarterWithNLead = 0x4AA2
var formTable = []*formInfo{...}
For Hangul we combine algorithmically, instead of using tables.
const hangulBase = 0xAC00
For Hangul we combine algorithmically, instead of using tables.
const hangulBase0 = 0xEA
For Hangul we combine algorithmically, instead of using tables.
const hangulBase1 = 0xB0
For Hangul we combine algorithmically, instead of using tables.
const hangulBase2 = 0x80
For Hangul we combine algorithmically, instead of using tables.
const hangulEnd = *ast.BinaryExpr
For Hangul we combine algorithmically, instead of using tables.
const hangulEnd0 = 0xED
For Hangul we combine algorithmically, instead of using tables.
const hangulEnd1 = 0x9E
For Hangul we combine algorithmically, instead of using tables.
const hangulEnd2 = 0xA4
const hangulUTF8Size = 3
const headerFlagsMask = 0xC0
const headerLenMask = 0x3F
const iShortDst
const iShortSrc
const iSuccess insertErr = *ast.UnaryExpr
For Hangul we combine algorithmically, instead of using tables.
const jamoLBase = 0x1100
For Hangul we combine algorithmically, instead of using tables.
const jamoLBase0 = 0xE1
For Hangul we combine algorithmically, instead of using tables.
const jamoLBase1 = 0x84
For Hangul we combine algorithmically, instead of using tables.
const jamoLEnd = 0x1113
For Hangul we combine algorithmically, instead of using tables.
const jamoLVTCount = *ast.BinaryExpr
For Hangul we combine algorithmically, instead of using tables.
const jamoTBase = 0x11A7
For Hangul we combine algorithmically, instead of using tables.
const jamoTCount = 28
For Hangul we combine algorithmically, instead of using tables.
const jamoTEnd = 0x11C3
For Hangul we combine algorithmically, instead of using tables.
const jamoVBase = 0x1161
For Hangul we combine algorithmically, instead of using tables.
const jamoVCount = 21
For Hangul we combine algorithmically, instead of using tables.
const jamoVEnd = 0x1176
For Hangul we combine algorithmically, instead of using tables.
const jamoVTCount = *ast.BinaryExpr
const lastDecomp = 0x4AA4
const lastDecomp = 0x4AA1
const lastDecomp = 0x4AA1
const lastDecomp = 0x4AB8
const lastDecomp = 0x4BE2
const lastDecomp = 0x4AA1
The maximum number of characters needed for a buffer is maxNonStarters + 1 for the starter + 1 for the GCJ
const maxBufferSize = *ast.BinaryExpr
const maxByteBufferSize = *ast.BinaryExpr
const maxDecomp = 0x8000
const maxDecomp = 0x8000
const maxDecomp = 0x8000
const maxDecomp = 0x8000
const maxDecomp = 0x8000
const maxDecomp = 0x8000
const maxNFCExpansion = 3
const maxNFKCExpansion = 18
const maxNonStarters = 30
var nfcData = *ast.CallExpr
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
nfcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfcIndex = [1408]uint8{...}
var nfcSparse = sparseBlocks{...}
nfcSparseOffset: 151 entries, 302 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseOffset: 142 entries, 284 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseOffset: 163 entries, 326 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseOffset: 149 entries, 298 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseOffset: 156 entries, 312 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseOffset: 145 entries, 290 bytes
var nfcSparseOffset = []uint16{...}
nfcSparseValues: 684 entries, 2736 bytes
var nfcSparseValues = [684]valueRange{...}
nfcSparseValues: 689 entries, 2756 bytes
var nfcSparseValues = [689]valueRange{...}
nfcSparseValues: 688 entries, 2752 bytes
var nfcSparseValues = [688]valueRange{...}
nfcSparseValues: 730 entries, 2920 bytes
var nfcSparseValues = [730]valueRange{...}
nfcSparseValues: 704 entries, 2816 bytes
var nfcSparseValues = [704]valueRange{...}
nfcSparseValues: 682 entries, 2728 bytes
var nfcSparseValues = [682]valueRange{...}
nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}
nfcValues: 46 blocks, 2944 entries, 5888 bytes The third block is the zero block.
var nfcValues = [2944]uint16{...}
nfcValues: 47 blocks, 3008 entries, 6016 bytes The third block is the zero block.
var nfcValues = [3008]uint16{...}
nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}
nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}
nfcValues: 48 blocks, 3072 entries, 6144 bytes The third block is the zero block.
var nfcValues = [3072]uint16{...}
var nfkcData = *ast.CallExpr
nfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}
nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}
nfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}
nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}
nfkcIndex: 22 blocks, 1408 entries, 1408 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint8{...}
nfkcIndex: 22 blocks, 1408 entries, 2816 bytes Block 0 is the zero block.
var nfkcIndex = [1408]uint16{...}
var nfkcSparse = sparseBlocks{...}
nfkcSparseOffset: 158 entries, 316 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseOffset: 176 entries, 352 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseOffset: 170 entries, 340 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseOffset: 155 entries, 310 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseOffset: 162 entries, 324 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseOffset: 164 entries, 328 bytes
var nfkcSparseOffset = []uint16{...}
nfkcSparseValues: 877 entries, 3508 bytes
var nfkcSparseValues = [877]valueRange{...}
nfkcSparseValues: 869 entries, 3476 bytes
var nfkcSparseValues = [869]valueRange{...}
nfkcSparseValues: 895 entries, 3580 bytes
var nfkcSparseValues = [895]valueRange{...}
nfkcSparseValues: 875 entries, 3500 bytes
var nfkcSparseValues = [875]valueRange{...}
nfkcSparseValues: 919 entries, 3676 bytes
var nfkcSparseValues = [919]valueRange{...}
nfkcSparseValues: 871 entries, 3484 bytes
var nfkcSparseValues = [871]valueRange{...}
nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}
nfkcValues: 92 blocks, 5888 entries, 11776 bytes The third block is the zero block.
var nfkcValues = [5888]uint16{...}
nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}
nfkcValues: 93 blocks, 5952 entries, 11904 bytes The third block is the zero block.
var nfkcValues = [5952]uint16{...}
nfkcValues: 94 blocks, 6016 entries, 12032 bytes The third block is the zero block.
var nfkcValues = [6016]uint16{...}
nfkcValues: 97 blocks, 6208 entries, 12416 bytes The third block is the zero block.
var nfkcValues = [6208]uint16{...}
const qcInfoMask = 0x3F
recompMap: 7520 bytes (entries only)
var recompMap map[uint32]rune
recompMap: 7520 bytes (entries only)
var recompMap map[uint32]rune
recompMap: 7520 bytes (entries only)
var recompMap map[uint32]rune
recompMap: 7528 bytes (entries only)
var recompMap map[uint32]rune
recompMap: 7520 bytes (entries only)
var recompMap map[uint32]rune
recompMap: 7528 bytes (entries only)
var recompMap map[uint32]rune
var recompMapOnce sync.Once
var recompMapOnce sync.Once
var recompMapOnce sync.Once
var recompMapOnce sync.Once
var recompMapOnce sync.Once
var recompMapOnce sync.Once
const recompMapPacked = *ast.BinaryExpr
const recompMapPacked = *ast.BinaryExpr
const recompMapPacked = *ast.BinaryExpr
const recompMapPacked = *ast.BinaryExpr
const recompMapPacked = *ast.BinaryExpr
const recompMapPacked = *ast.BinaryExpr
Indicates a rune caused a segment overflow and a CGJ should be inserted.
const ssOverflow
Indicates a rune starts a new segment and should not be added.
const ssStarter
Indicates a rune was successfully added to the segment.
const ssSuccess ssState = iota
A Form denotes a canonical representation of Unicode code points. The Unicode-defined normalization and equivalence forms are: NFC Unicode Normalization Form C NFD Unicode Normalization Form D NFKC Unicode Normalization Form KC NFKD Unicode Normalization Form KD For a Form f, this documentation uses the notation f(x) to mean the bytes or string x converted to the given form. A position n in x is called a boundary if conversion to the form can proceed independently on both sides: f(x) == append(f(x[0:n]), f(x[n:])...) References: https://unicode.org/reports/tr15/ and https://unicode.org/notes/tn5/.
type Form int
insertErr is an error code returned by insert. Using this type instead of error improves performance up to 20% for many of the benchmarks.
type insertErr int
type iterFunc func(*Iter) []byte
functions dispatchable per form
type lookupFunc func(b input, i int) Properties
We pack quick check data in 4 bits: 5: Combines forward (0 == false, 1 == true) 4..3: NFC_QC Yes(00), No (10), or Maybe (11) 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. 1..0: Number of trailing non-starters. When all 4 bits are zero, the character is inert, meaning it is never influenced by normalization.
type qcInfo uint8
ssState is used for reporting the segment state after inserting a rune. It is returned by streamSafe.next.
type ssState int
streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
An Iter iterates over a string or byte slice, while normalizing it to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties
next iterFunc
asciiF iterFunc
p int
multiSeg []byte
}
Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8
size uint8
ccc uint8
tccc uint8
nLead uint8
flags qcInfo
index uint16
}
formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing bool
compatibility bool
info lookupFunc
nextMain iterFunc
}
type input struct {
str string
bytes []byte
}
nfcTrie. Total size: 10798 bytes (10.54 KiB). Checksum: b5981cc85e3bd14.
type nfcTrie struct {
}
nfcTrie. Total size: 10610 bytes (10.36 KiB). Checksum: 95e8869a9f81e5e6.
type nfcTrie struct {
}
nfcTrie. Total size: 10332 bytes (10.09 KiB). Checksum: 51cc525b297fc970.
type nfcTrie struct {
}
nfcTrie. Total size: 10442 bytes (10.20 KiB). Checksum: 4ba400a9d8208e03.
type nfcTrie struct {
}
nfcTrie. Total size: 10680 bytes (10.43 KiB). Checksum: a555db76d4becdd2.
type nfcTrie struct {
}
nfcTrie. Total size: 10586 bytes (10.34 KiB). Checksum: dd926e82067bee11.
type nfcTrie struct {
}
nfkcTrie. Total size: 18684 bytes (18.25 KiB). Checksum: 113e23c477adfabd.
type nfkcTrie struct {
}
nfkcTrie. Total size: 18768 bytes (18.33 KiB). Checksum: c51186dd2412943d.
type nfkcTrie struct {
}
nfkcTrie. Total size: 17248 bytes (16.84 KiB). Checksum: 4fb368372b6b1b27.
type nfkcTrie struct {
}
nfkcTrie. Total size: 16994 bytes (16.60 KiB). Checksum: c3ed54ee046f3c46.
type nfkcTrie struct {
}
nfkcTrie. Total size: 19260 bytes (18.81 KiB). Checksum: 1a0bbc4c8c24da49.
type nfkcTrie struct {
}
nfkcTrie. Total size: 17104 bytes (16.70 KiB). Checksum: d985061cf5307b35.
type nfkcTrie struct {
}
type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}
type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}
reorderBuffer is used to normalize a single segment. Characters inserted with insert are decomposed and reordered based on CCC. The compose method can be used to recombine characters. Note that the byte buffer does not hold the UTF-8 characters in order. Only the rune array is maintained in sorted order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties
byte [maxByteBufferSize]byte
nbyte uint8
ss streamSafe
nrune int
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
type valueRange struct {
value uint16
lo byte
hi byte
}
Append returns f(append(out, b...)). The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte
AppendString returns f(append(out, []byte(s))). The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte
BoundaryAfter returns true if runes cannot combine with or otherwise interact with this or previous runes.
func (p Properties) BoundaryAfter() bool
BoundaryBefore returns true if this rune starts a new segment and cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() bool
Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte
CCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8
Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() error
Decomposition returns the decomposition for the underlying rune or nil if there is none.
func (p Properties) Decomposition() []byte
Done returns true if there is no more input to process.
func (i *Iter) Done() bool
FirstBoundary returns the position i of the first boundary in b or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int
FirstBoundaryInString returns the position i of the first boundary in s or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int
Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte)
InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string)
IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool
IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool
LastBoundary returns the position i of the last boundary in b or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int
LeadCCC returns the CCC of the first rune in the decomposition. If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8
Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input. For any input a and b for which f(a) == f(b), subsequent calls to Next will return the same segments. Modifying runes are grouped together with the preceding starter, if such a starter exists. Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte
NextBoundary reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int
NextBoundaryInString reports the index of the boundary between the first and next segment in b or -1 if atEOF is false and there are not enough bytes to determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int
Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int
Properties returns properties for the first rune in s.
func (f Form) Properties(s []byte) Properties
PropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) Properties
QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int
QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int
Read implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error)
Reader returns a new reader that implements Read by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.Reader
Reset implements the Reset method of the transform.Transformer interface.
func (Form) Reset()
Seek sets the segment to be returned by the next call to Next to start at position p. It is the responsibility of the caller to set p to the start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error)
Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() int
Span implements transform.SpanningTransformer. It returns a boundary n such that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error)
SpanString returns a boundary n such that s[0:n] == f(s[0:n]). It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error)
String returns f(s).
func (f Form) String(s string) string
TrailCCC returns the CCC of the last rune in the decomposition. If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8
Transform implements the Transform method of the transform.Transformer interface. It may need to write segments of up to MaxSegmentSize at once. Users should either catch ErrShortDst and allow dst to grow or have dst be at least of size MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst []byte, src []byte, atEOF bool) (nDst int, nSrc int, err error)
Write implements the standard write interface. If the last characters are not at a normalization boundary, the bytes will be buffered for the next write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error)
Writer returns a new writer that implements Write(b) by writing f(b) to w. The returned writer may use an internal buffer to maintain state across Write calls. Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser
func (in *input) _byte(p int) byte
appendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) bool
func appendQuick(rb *reorderBuffer, i int) int
appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune)
func (in *input) appendSlice(buf []byte, b int, e int) []byte
assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune)
backwards is used for checking for overflow and segment starts when traversing a string backwards. Users do not need to call first for the first rune. The state of the streamSafe retains the count of the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssState
func buildRecompMap()
bytesAt returns the UTF-8 encoding of the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte
func (in *input) charinfoNFC(p int) (uint16, int)
func (in *input) charinfoNFKC(p int) (uint16, int)
func cmpNormalBytes(rb *reorderBuffer) bool
combine returns the combined rune or 0 if it doesn't exist. The caller is responsible for calling recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(a rune, b rune) rune
combineHangul algorithmically combines Jamo character components into Hangul. See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s int, i int, k int)
func (p Properties) combinesBackward() bool
func (p Properties) combinesForward() bool
compInfo converts the information contained in v and sz to a Properties. See the comment at the top of the file for more information on the format.
func compInfo(v uint16, sz int) Properties
compose recombines the runes in the buffer. It should only be used to recompose a single segment, as it will not handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose()
func (in *input) copySlice(buf []byte, b int, e int) int
decomposeHangul algorithmically decomposes a Hangul rune into its Jamo components. See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune)
decomposeHangul writes the decomposed Hangul to buf and returns the number of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int
decomposeSegment scans the first segment in src into rb. It inserts 0x034f (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int
decomposeToLastBoundary finds an open segment at the end of the buffer and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer)
func (f Form) doAppend(out []byte, src input, n int) []byte
func doAppend(rb *reorderBuffer, out []byte, p int) []byte
func doAppendInner(rb *reorderBuffer, p int) []byte
func (rb *reorderBuffer) doFlush() bool
func doNormComposed(i *Iter) []byte
func doNormDecomposed(i *Iter) []byte
first inserts the first rune of a segment. It is a faster version of next if it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties)
func (f Form) firstBoundary(src input, nsrc int) int
flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte
flushCopy copies the normalized segment to buf and resets rb. It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int
func flushTransform(rb *reorderBuffer) bool
func (in *input) hangul(p int) (r rune)
func (p Properties) hasDecomposition() bool
func (rb *reorderBuffer) init(f Form, src []byte)
func (rb *reorderBuffer) initString(f Form, src string)
func inputBytes(str []byte) input
func inputString(str string) input
insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ()
insertDecomposed inserts an entry in to the reorderBuffer for each rune in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes. It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr
insertFlush inserts the given rune in the buffer ordered by CCC. If a decomposition with multiple segments are encountered, they leading ones are flushed. It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr
insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class. It returns false if the buffer is not large enough to hold the rune. It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties)
insertSingle inserts an entry in the reorderBuffer for the rune at position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties)
insertUnsafe inserts the given rune in the buffer ordered by CCC. It is assumed there is sufficient space to hold the runes. It is the responsibility of the caller to ensure this. This can be done by checking the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties)
func isHangul(b []byte) bool
func isHangulString(b string) bool
func isHangulWithoutJamoT(b []byte) bool
func (p Properties) isInert() bool
Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool
func (ss streamSafe) isMax() bool
func (p Properties) isYesC() bool
func (p Properties) isYesD() bool
func lastBoundary(fd *formInfo, b []byte) int
lastRuneStart returns the runeInfo and position of the last rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
lookup determines the type of block n and looks up the value for b. For n < t.cutoff, the block is a simple lookup table. Otherwise, the block is a list of ranges with an accompanying value. Given a matching range r, the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookup(s []byte) (v uint16, sz int)
lookup returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookup(s []byte) (v uint16, sz int)
func lookupInfoNFC(b input, i int) Properties
func lookupInfoNFKC(b input, i int) Properties
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupString returns the trie value for the first UTF-8 encoding in s and the width in bytes of this encoding. The size will be 0 if s does not hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *nfkcTrie) lookupString(s string) (v uint16, sz int)
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupStringUnsafe(s string) uint16
lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupStringUnsafe(s string) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfkcTrie) lookupUnsafe(s []byte) uint16
lookupUnsafe returns the trie value for the first UTF-8 encoding in s. s must start with a full and valid UTF-8 encoded rune.
func (t *nfcTrie) lookupUnsafe(s []byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16
lookupValue determines the type of block n and looks up the value for b.
func (t *nfcTrie) lookupValue(n uint32, b byte) uint16
func (p Properties) multiSegment() bool
func (p Properties) nLeadingNonStarters() uint8
func (p Properties) nTrailingNonStarters() uint8
func newNfcTrie(i int) *nfcTrie
func newNfcTrie(i int) *nfcTrie
func newNfcTrie(i int) *nfcTrie
func newNfcTrie(i int) *nfcTrie
func newNfcTrie(i int) *nfcTrie
func newNfcTrie(i int) *nfcTrie
func newNfkcTrie(i int) *nfkcTrie
func newNfkcTrie(i int) *nfkcTrie
func newNfkcTrie(i int) *nfkcTrie
func newNfkcTrie(i int) *nfkcTrie
func newNfkcTrie(i int) *nfkcTrie
func newNfkcTrie(i int) *nfkcTrie
insert returns a ssState value to indicate whether a rune represented by p can be inserted.
func (ss *streamSafe) next(p Properties) ssState
func nextASCIIBytes(i *Iter) []byte
func nextASCIIString(i *Iter) []byte
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int
func nextCGJCompose(i *Iter) []byte
func nextCGJDecompose(i *Iter) []byte
nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []byte
nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte)
func nextDone(i *Iter) []byte
func nextHangul(i *Iter) []byte
nextMulti is used for iterating over multi-segment decompositions for decomposing normal forms.
func nextMulti(i *Iter) []byte
nextMultiNorm is used for iterating over multi-segment decompositions for composing normal forms.
func nextMultiNorm(i *Iter) []byte
patchTail fixes a case where a rune may be incorrectly normalized if it is followed by illegal continuation bytes. It returns the patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool
quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and whether any non-normalized parts were found. If atEOF is false, n will not point past the last segment if this segment might be become non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i int, end int, atEOF bool) (n int, ok bool)
reset discards all characters from the buffer.
func (rb *reorderBuffer) reset()
returnSlice returns a slice of the underlying input type as a byte slice. If the underlying is of type []byte, it will simply return a slice. If the underlying is of type string, it will copy the slice to the buffer and return that.
func (i *Iter) returnSlice(a int, b int) []byte
runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune
func (in *input) setBytes(str []byte)
func (i *Iter) setDone()
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool)
func (in *input) setString(str string)
func (in *input) skipASCII(p int, max int) int
func (in *input) skipContinuationBytes(p int) int
transform implements the transform.Transformer interface. It is only called when quickSpan does not pass for a given string.
func (f Form) transform(dst []byte, src []byte, atEOF bool) (nDst int, nSrc int, err error)
Generated with Arrow