language

Imports

Imports #

"errors"
"bytes"
"errors"
"fmt"
"sort"
"golang.org/x/text/internal/tag"
"golang.org/x/text/internal/tag"
"sort"
"strings"
"errors"
"fmt"
"strings"
"bytes"
"fmt"
"sort"
"strconv"
"golang.org/x/text/internal/tag"

Constants & Variables

AliasMap var #

AliasMap maps langIDs to their suggested replacements. Size: 772 bytes, 193 elements

var AliasMap = [193]FromTo{...}

AliasTypeUnknown const #

const AliasTypeUnknown AliasType = *ast.UnaryExpr

AliasTypes var #

Size: 193 bytes, 193 elements

var AliasTypes = [193]AliasType{...}

CLDRVersion const #

CLDRVersion is the CLDR version from which the tables in this package are derived.

const CLDRVersion = "32"

Deprecated const #

const Deprecated AliasType = iota

ErrDuplicateKey var #

ErrDuplicateKey is returned when a tag contains the same key twice with different values in the -u section.

var ErrDuplicateKey = *ast.CallExpr

ErrMissingLikelyTagsData var #

ErrMissingLikelyTagsData indicates no information was available to compute likely values of missing tags.

var ErrMissingLikelyTagsData = *ast.CallExpr

ErrSyntax var #

ErrSyntax is returned by any of the parsing functions when the input is not well-formed, according to BCP 47. TODO: return the position at which the syntax error occurred?

var ErrSyntax = *ast.CallExpr

Legacy const #

const Legacy

Macro const #

const Macro

NumLanguages const #

const NumLanguages = 8798

NumRegions const #

const NumRegions = 358

NumScripts const #

const NumScripts = 261

Und var #

Und is the root language.

var Und Tag

_001 const #

const _001 = 1

_419 const #

const _419 = 31

_BR const #

const _BR = 65

_CA const #

const _CA = 73

_ES const #

const _ES = 111

_GB const #

const _GB = 124

_Hani const #

const _Hani = 57

_Hans const #

const _Hans = 59

_Hant const #

const _Hant = 60

_Latn const #

const _Latn = 91

_MD const #

const _MD = 189

_PT const #

const _PT = 239

_Qaaa const #

const _Qaaa = 149

_Qaai const #

const _Qaai = 157

_Qabx const #

const _Qabx = 198

_UK const #

const _UK = 307

_US const #

const _US = 310

_XA const #

const _XA = 324

_XC const #

const _XC = 326

_XK const #

const _XK = 334

_ZZ const #

const _ZZ = 358

_Zinh const #

const _Zinh = 255

_Zyyy const #

const _Zyyy = 260

_Zzzz const #

const _Zzzz = 261

_af const #

const _af = 22

_am const #

const _am = 39

_ami const #

const _ami = 1650

_ar const #

const _ar = 58

_az const #

const _az = 88

_bg const #

const _bg = 126

_bn const #

const _bn = 165

_bnn const #

const _bnn = 2357

_ca const #

const _ca = 215

_cmn const #

const _cmn = 3007

_cs const #

const _cs = 250

_da const #

const _da = 257

_de const #

const _de = 269

_el const #

const _el = 310

_en const #

const _en = 313

_es const #

const _es = 318

_et const #

const _et = 320

_fa const #

const _fa = 328

_fi const #

const _fi = 337

_fil const #

const _fil = 339

_fr const #

const _fr = 350

_gu const #

const _gu = 420

_hak const #

const _hak = 438

_he const #

const _he = 444

_hi const #

const _hi = 446

_hr const #

const _hr = 465

_hsn const #

const _hsn = 467

_hu const #

const _hu = 469

_hy const #

const _hy = 471

_id const #

const _id = 481

_is const #

const _is = 504

_it const #

const _it = 505

_ja const #

const _ja = 512

_jbo const #

const _jbo = 515

_ka const #

const _ka = 528

_kk const #

const _kk = 578

_km const #

const _km = 586

_kn const #

const _kn = 593

_ko const #

const _ko = 596

_ky const #

const _ky = 650

_lb const #

const _lb = 661

_lo const #

const _lo = 696

_lt const #

const _lt = 704

_lv const #

const _lv = 711

_mk const #

const _mk = 767

_ml const #

const _ml = 772

_mn const #

const _mn = 779

_mo const #

const _mo = 784

_mr const #

const _mr = 795

_ms const #

const _ms = 799

_mul const #

const _mul = 806

_my const #

const _my = 817

_nan const #

const _nan = 835

_nb const #

const _nb = 839

_ne const #

const _ne = 849

_nl const #

const _nl = 871

_nn const #

const _nn = 874

_no const #

const _no = 879

_nv const #

const _nv = 899

_pa const #

const _pa = 925

_pl const #

const _pl = 947

_pt const #

const _pt = 960

_pwn const #

const _pwn = 12055

_ro const #

const _ro = 988

_ru const #

const _ru = 994

_sfb const #

const _sfb = 13629

_sgg const #

const _sgg = 13660

_sh const #

const _sh = 1031

_si const #

const _si = 1036

_sk const #

const _sk = 1042

_sl const #

const _sl = 1046

_sq const #

const _sq = 1073

_sr const #

const _sr = 1074

_sv const #

const _sv = 1092

_sw const #

const _sw = 1093

_ta const #

const _ta = 1104

_tao const #

const _tao = 14188

_tay const #

const _tay = 14198

_te const #

const _te = 1121

_th const #

const _th = 1131

_tl const #

const _tl = 1146

_tlh const #

const _tlh = 14467

_tn const #

const _tn = 1152

_tr const #

const _tr = 1162

_tsu const #

const _tsu = 14662

_uk const #

const _uk = 1198

_ur const #

const _ur = 1204

_uz const #

const _uz = 1212

_vgt const #

const _vgt = 15701

_vi const #

const _vi = 1219

_zh const #

const _zh = 1321

_zu const #

const _zu = 1327

altLangISO3 const #

altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives to 2-letter language codes that cannot be derived using the method described above. Each 3-letter code is followed by its 1-byte langID.

const altLangISO3 tag.Index = "---\x00cor\x00hbs\x01heb\x02kin\x03spa\x04yid\x05\xff\xff\xff\xff"

altLangIndex var #

altLangIndex is used to convert indexes in altLangISO3 to langIDs. Size: 12 bytes, 6 elements

var altLangIndex = [6]uint16{...}

altRegionIDs var #

altRegionIDs holds a list of regionIDs the positions of which match those of the 3-letter ISO codes in altRegionISO3. Size: 22 bytes, 11 elements

var altRegionIDs = [11]uint16{...}

altRegionISO3 const #

altRegionISO3 holds a list of 3-letter region codes that cannot be mapped to 2-letter codes using the default algorithm. This is a short list.

const altRegionISO3 string = "SCGQUUSGSCOMPRKCYMSPMSRBATFMYTATN"

altTagIndex var #

var altTagIndex = [...]uint8{...}

altTags var #

var altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"

base const #

const base = *ast.BinaryExpr

bcp47Region const #

const bcp47Region

ccTLD const #

const ccTLD

errInvalidArguments var #

var errInvalidArguments = *ast.CallExpr

errNoTLD var #

var errNoTLD = *ast.CallExpr

errPrivateUse var #

var errPrivateUse = *ast.CallExpr

fromM49 var #

fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details. Size: 666 bytes, 333 elements

var fromM49 = [333]uint16{...}

grandfatheredMap var #

grandfatheredMap holds a mapping from legacy and grandfathered tags to their base language or index to more elaborate tag.

var grandfatheredMap = map[[maxLen]byte]int16{...}

isList const #

const isList = *ast.BinaryExpr

iso3166UserAssigned const #

const iso3166UserAssigned = *ast.BinaryExpr

isoRegionOffset const #

isoRegionOffset needs to be added to the index of regionISO to obtain the regionID for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for the UN.M49 codes used for groups.)

const isoRegionOffset = 32

lang const #

lang holds an alphabetically sorted list of ISO-639 language identifiers. All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. For 2-byte language identifiers, the two successive bytes have the following meaning: - if the first letter of the 2- and 3-letter ISO codes are the same: the second and third letter of the 3-letter ISO code. - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. For 3-byte language identifiers the 4th byte is 0.

const lang tag.Index = *ast.BinaryExpr

langNoIndex var #

langNoIndex is a bit vector of all 3-letter language codes that are not used as an index in lookup tables. The language ids for these language codes are derived directly from the letters and are not consecutive. Size: 2197 bytes, 2197 elements

var langNoIndex = [2197]uint8{...}

langNoIndexOffset const #

const langNoIndexOffset = 1330

langPrivateEnd const #

const langPrivateEnd = 0x3179

langPrivateStart const #

const langPrivateStart = 0x2f72

likelyLang var #

likelyLang is a lookup table, indexed by langID, for the most likely scripts and regions given incomplete information. If more entries exist for a given language, region and script are the index and size respectively of the list in likelyLangList. Size: 7980 bytes, 1330 elements

var likelyLang = [1330]likelyScriptRegion{...}

likelyLangList var #

likelyLangList holds lists info associated with likelyLang. Size: 582 bytes, 97 elements

var likelyLangList = [97]likelyScriptRegion{...}

likelyRegion var #

likelyRegion is a lookup table, indexed by regionID, for the most likely languages and scripts given incomplete information. If more entries exist for a given regionID, lang and script are the index and size respectively of the list in likelyRegionList. TODO: exclude containers and user-definable regions from the list. Size: 2154 bytes, 359 elements

var likelyRegion = [359]likelyLangScript{...}

likelyRegionGroup var #

Size: 198 bytes, 33 elements

var likelyRegionGroup = [33]likelyTag{...}

likelyRegionList var #

likelyRegionList holds lists info associated with likelyRegion. Size: 558 bytes, 93 elements

var likelyRegionList = [93]likelyLangScript{...}

likelyScript var #

likelyScript is a lookup table, indexed by scriptID, for the most likely languages and regions given a script. Size: 1052 bytes, 263 elements

var likelyScript = [263]likelyLangRegion{...}

m49 var #

m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are codes indicating collections of regions. Size: 718 bytes, 359 elements

var m49 = [359]int16{...}

m49Index var #

m49Index gives indexes into fromM49 based on the three most significant bits of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. The region code is stored in the 9 lsb of the indexed value. Size: 18 bytes, 9 elements

var m49Index = [9]int16{...}

max99thPercentileSize const #

max99thPercentileSize is a somewhat arbitrary buffer size that presumably is large enough to hold at least 99% of the BCP 47 tags.

const max99thPercentileSize = 32

maxAltTaglen const #

const maxAltTaglen = *ast.CallExpr

maxCoreSize const #

maxCoreSize is the maximum size of a BCP 47 tag without variants and extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.

const maxCoreSize = 12

maxLen const #

const maxLen = maxAltTaglen

maxSimpleUExtensionSize const #

maxSimpleUExtensionSize is the maximum size of a -u extension with one key-type pair. Equals len("-u-") + key (2) + dash + max value (8).

const maxSimpleUExtensionSize = 14

nRegionGroups const #

nRegionGroups is the number of region groups.

const nRegionGroups = 33

nonCanonicalUnd const #

const nonCanonicalUnd = 1201

parents var #

Size: 414 bytes, 5 elements

var parents = [5]parentRel{...}

regionContainment var #

Size: 264 bytes, 33 elements

var regionContainment = [33]uint64{...}

regionISO const #

regionISO holds a list of alphabetically sorted 2-letter ISO region codes. Each 2-letter codes is followed by two bytes with the following meaning: - [A-Z}{2}: the first letter of the 2-letter code plus these two letters form the 3-letter ISO code. - 0, n: index into altRegionISO3.

const regionISO tag.Index = *ast.BinaryExpr

regionInFrom const #

const regionInFrom

regionInclusion var #

regionInclusion maps region identifiers to sets of regions in regionInclusionBits, where each set holds all groupings that are directly connected in a region containment graph. Size: 359 bytes, 359 elements

var regionInclusion = [359]uint8{...}

regionInclusionBits var #

regionInclusionBits is an array of bit vectors where every vector represents a set of region groupings. These sets are used to compute the distance between two regions for the purpose of language matching. Size: 584 bytes, 73 elements

var regionInclusionBits = [73]uint64{...}

regionInclusionNext var #

regionInclusionNext marks, for each entry in regionInclusionBits, the set of all groups that are reachable from the groups set in the respective entry. Size: 73 bytes, 73 elements

var regionInclusionNext = [73]uint8{...}

regionOldMap var #

Size: 80 bytes, 20 elements

var regionOldMap = [20]FromTo{...}

regionTypes var #

regionTypes defines the status of a region for various standards. Size: 359 bytes, 359 elements

var regionTypes = [359]uint8{...}

script const #

script is an alphabetically sorted list of ISO 15924 codes. The index of the script in the string, divided by 4, is the internal scriptID.

const script tag.Index = *ast.BinaryExpr

scriptInFrom const #

const scriptInFrom

separator var #

var separator = []byte{...}

suppressScript var #

suppressScript is an index from langID to the dominant script for that language, if it exists. If a script is given, it should be suppressed from the language tag. Size: 1330 bytes, 1330 elements

var suppressScript = [1330]uint8{...}

variantIndex var #

Size: 2128 bytes

var variantIndex = map[string]uint8{...}

variantNumSpecialized const #

variantNumSpecialized is the number of specialized variants in variants.

const variantNumSpecialized = 105

Type Aliases

AliasType type #

AliasType is the type of an alias in AliasMap.

type AliasType int8

CompactCoreInfo type #

CompactCoreInfo is a compact integer with the three core tags encoded.

type CompactCoreInfo uint32

Language type #

type Language uint16

Region type #

type Region uint16

Script type #

type Script uint16

scriptRegionFlags type #

type scriptRegionFlags uint8

sortVariants type #

type sortVariants []string

Structs

Builder struct #

A Builder allows constructing a Tag from individual components. Its main user is Compose in the top-level language package.

type Builder struct {
Tag Tag
private string
variants []string
extensions []string
}

FromTo struct #

type FromTo struct {
From uint16
To uint16
}

Tag struct #

Tag represents a BCP 47 language tag. It is used to specify an instance of a specific language or locale. All language tag values are guaranteed to be well-formed. The zero value of Tag is Und.

type Tag struct {
LangID Language
RegionID Region
ScriptID Script
pVariant byte
pExt uint16
str string
}

ValueError struct #

ValueError is returned by any of the parsing functions when the input is well-formed but the respective subtag is not recognized as a valid value.

type ValueError struct {
v [8]byte
}

Variant struct #

Variant represents a registered variant of a language as defined by BCP 47.

type Variant struct {
ID uint8
str string
}

bytesSort struct #

type bytesSort struct {
b [][]byte
n int
}

likelyLangRegion struct #

type likelyLangRegion struct {
lang uint16
region uint16
}

likelyLangScript struct #

type likelyLangScript struct {
lang uint16
script uint16
flags uint8
}

likelyScriptRegion struct #

type likelyScriptRegion struct {
region uint16
script uint16
flags uint8
}

likelyTag struct #

type likelyTag struct {
lang uint16
region uint16
script uint16
}

parentRel struct #

type parentRel struct {
lang uint16
script uint16
maxScript uint16
toRegion uint16
fromRegion []uint16
}

scanner struct #

scanner is used to scan BCP 47 tokens, which are separated by _ or -.

type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int
end int
next int
err error
done bool
}

variantsSort struct #

type variantsSort struct {
i []uint8
v [][]byte
}

Functions

AddExt method #

AddExt adds extension e to the tag. e must be a valid extension as returned by Tag.Extension. If the extension already exists, it will be discarded, except for a -u extension, where non-existing key-type pairs will added.

func (b *Builder) AddExt(e string)

AddVariant method #

AddVariant adds any number of variants.

func (b *Builder) AddVariant(v ...string)

BaseLanguages function #

BaseLanguages returns the list of all supported base languages. It generates the list by traversing the internal structures.

func BaseLanguages() []Language

Canonicalize method #

Canonicalize returns the region or a possible replacement if the region is deprecated. It will not return a replacement for deprecated regions that are split into multiple regions.

func (r Region) Canonicalize() Region

Canonicalize method #

func (id Language) Canonicalize() (Language, AliasType)

ClearExtensions method #

ClearExtensions removes any extensions previously added, including those copied from a Tag in SetTag.

func (b *Builder) ClearExtensions()

ClearVariants method #

ClearVariants removes any variants previously added, including those copied from a Tag in SetTag.

func (b *Builder) ClearVariants()

Contains method #

Contains returns whether Region c is contained by Region r. It returns true if c == r.

func (r Region) Contains(c Region) bool

EncodeM49 function #

EncodeM49 returns the Region for the given UN M.49 code. It returns an error if r is not a valid code.

func EncodeM49(r int) (Region, error)

Error method #

Error implements the error interface.

func (e ValueError) Error() string

Extension method #

Extension returns the extension of type x for tag t. It will return false for ok if t does not have the requested extension. The returned extension will be invalid in this case.

func (t Tag) Extension(x byte) (ext string, ok bool)

Extensions method #

Extensions returns all extensions of t.

func (t Tag) Extensions() []string

GetCompactCore function #

GetCompactCore generates a uint32 value that is guaranteed to be unique for different language, region, and script values.

func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool)

HasExtensions method #

HasExtensions reports whether t has extensions.

func (t Tag) HasExtensions() bool

HasString method #

HasString reports whether this tag defines more than just the raw components.

func (t Tag) HasString() bool

HasVariants method #

HasVariants reports whether t has variants.

func (t Tag) HasVariants() bool

ISO3 method #

ISO3 returns the 3-letter ISO code of r. Note that not all regions have a 3-letter ISO code. In such cases this method returns "ZZZ".

func (r Region) ISO3() string

ISO3 method #

ISO3 returns the ISO 639-3 language code.

func (b Language) ISO3() string

IsCountry method #

IsCountry returns whether this region is a country or autonomous area. This includes non-standard definitions from CLDR.

func (r Region) IsCountry() bool

IsGroup method #

IsGroup returns whether this region defines a collection of regions. This includes non-standard definitions from CLDR.

func (r Region) IsGroup() bool

IsPrivateUse method #

IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This may include private-use tags that are assigned by CLDR and used in this implementation. So IsPrivateUse and IsCountry can be simultaneously true.

func (r Region) IsPrivateUse() bool

IsPrivateUse method #

IsPrivateUse reports whether this script code is reserved for private use.

func (s Script) IsPrivateUse() bool

IsPrivateUse method #

IsPrivateUse reports whether this language code is reserved for private use.

func (b Language) IsPrivateUse() bool

IsPrivateUse method #

IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use tag.

func (t Tag) IsPrivateUse() bool

IsRoot method #

IsRoot returns true if t is equal to language "und".

func (t Tag) IsRoot() bool

Len method #

func (b bytesSort) Len() int

Len method #

func (s sortVariants) Len() int

Len method #

func (s variantsSort) Len() int

Less method #

func (b bytesSort) Less(i int, j int) bool

Less method #

func (s sortVariants) Less(i int, j int) bool

Less method #

func (s variantsSort) Less(i int, j int) bool

M49 method #

M49 returns the UN M.49 encoding of r, or 0 if this encoding is not defined for r.

func (r Region) M49() int

Make method #

Make returns a new Tag from the current settings.

func (b *Builder) Make() Tag

Make function #

Make is a convenience wrapper for Parse that omits the error. In case of an error, a sensible default is returned.

func Make(s string) Tag

MarshalText method #

MarshalText implements encoding.TextMarshaler.

func (t Tag) MarshalText() (text []byte, err error)

Maximize method #

Maximize returns a new tag with missing tags filled in.

func (t Tag) Maximize() (Tag, error)

MustParse function #

MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed. It simplifies safe initialization of Tag values.

func MustParse(s string) Tag

MustParseBase function #

MustParseBase is like ParseBase, but panics if the given base cannot be parsed. It simplifies safe initialization of Base values.

func MustParseBase(s string) Language

MustParseRegion function #

MustParseRegion is like ParseRegion, but panics if the given region cannot be parsed. It simplifies safe initialization of Region values.

func MustParseRegion(s string) Region

MustParseScript function #

MustParseScript is like ParseScript, but panics if the given script cannot be parsed. It simplifies safe initialization of Script values.

func MustParseScript(s string) Script

NewValueError function #

NewValueError creates a new ValueError.

func NewValueError(tag []byte) ValueError

Parent method #

Parent returns the CLDR parent of t. In CLDR, missing fields in data for a specific language are substituted with fields from the parent language. The parent for a language may change for newer versions of CLDR.

func (t Tag) Parent() Tag

Parse function #

Parse parses the given BCP 47 string and returns a valid Tag. If parsing failed it returns an error and any part of the tag that could be parsed. If parsing succeeded but an unknown value was found, it returns ValueError. The Tag returned in this case is just stripped of the unknown value. All other values are preserved. It accepts tags in the BCP 47 format and extensions to this standard defined in https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.

func Parse(s string) (t Tag, err error)

ParseBase function #

ParseBase parses a 2- or 3-letter ISO 639 code. It returns a ValueError if s is a well-formed but unknown language identifier or another error if another error occurred.

func ParseBase(s string) (l Language, err error)

ParseExtension function #

ParseExtension parses s as an extension and returns it on success.

func ParseExtension(s string) (ext string, err error)

ParseRegion function #

ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code. It returns a ValueError if s is a well-formed but unknown region identifier or another error if another error occurred.

func ParseRegion(s string) (r Region, err error)

ParseScript function #

ParseScript parses a 4-letter ISO 15924 code. It returns a ValueError if s is a well-formed but unknown script identifier or another error if another error occurred.

func ParseScript(s string) (scr Script, err error)

ParseVariant function #

ParseVariant parses and returns a Variant. An error is returned if s is not a valid variant.

func ParseVariant(s string) (v Variant, err error)

Raw method #

Raw returns the raw base language, script and region, without making an attempt to infer their values. TODO: consider removing

func (t Tag) Raw() (b Language, s Script, r Region)

RemakeString method #

RemakeString is used to update t.str in case lang, script or region changed. It is assumed that pExt and pVariant still point to the start of the respective parts.

func (t *Tag) RemakeString()

SetExt method #

SetExt sets the extension e to the tag. e must be a valid extension as returned by Tag.Extension. If the extension already exists, it will be overwritten, except for a -u extension, where the individual key-type pairs will be set.

func (b *Builder) SetExt(e string)

SetTag method #

SetTag copies all the settings from a given Tag. Any previously set values are discarded.

func (b *Builder) SetTag(t Tag)

SetTypeForKey method #

SetTypeForKey returns a new Tag with the key set to type, where key and type are of the allowed values defined for the Unicode locale extension ('u') in https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. An empty value removes an existing pair with the same key.

func (t Tag) SetTypeForKey(key string, value string) (Tag, error)

String method #

String returns the canonical string representation of the language tag.

func (t Tag) String() string

String method #

String returns the script code in title case. It returns "Zzzz" for an unspecified script.

func (s Script) String() string

String method #

String returns the BCP 47 representation for the region. It returns "ZZ" for an unspecified region.

func (r Region) String() string

String method #

String returns the BCP 47 representation of the langID. Use b as variable name, instead of id, to ensure the variable used is consistent with that of Base in which this type is embedded.

func (b Language) String() string

String method #

String returns the string representation of the variant.

func (v Variant) String() string

StringToBuf method #

StringToBuf writes the string to b and returns the number of bytes written. cap(b) must be >= 3.

func (id Language) StringToBuf(b []byte) int

Subtag method #

Subtag returns the subtag for which the error occurred.

func (e ValueError) Subtag() string

SuppressScript method #

SuppressScript returns the script marked as SuppressScript in the IANA language tag repository, or 0 if there is no such script.

func (b Language) SuppressScript() Script

Swap method #

func (b bytesSort) Swap(i int, j int)

Swap method #

func (s variantsSort) Swap(i int, j int)

Swap method #

func (s sortVariants) Swap(i int, j int)

TLD method #

TLD returns the country code top-level domain (ccTLD). UK is returned for GB. In all other cases it returns either the region itself or an error. This method may return an error for a region for which there exists a canonical form with a ccTLD. To get that ccTLD canonicalize r first. The region will already be canonicalized it was obtained from a Tag that was obtained using any of the default methods.

func (r Region) TLD() (Region, error)

Tag method #

Tag generates a tag from c.

func (c CompactCoreInfo) Tag() Tag

TypeForKey method #

TypeForKey returns the type associated with the given key, where key and type are of the allowed values defined for the Unicode locale extension ('u') in https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. TypeForKey will traverse the inheritance chain to get the correct value. If there are multiple types associated with a key, only the first will be returned. If there is no type associated with a key, it returns the empty string.

func (t Tag) TypeForKey(key string) string

UnmarshalText method #

UnmarshalText implements encoding.TextUnmarshaler.

func (t *Tag) UnmarshalText(text []byte) error

VariantOrPrivateUseTags method #

VariantOrPrivateUseTags returns variants or private use tags.

func (t Tag) VariantOrPrivateUseTags() string

Variants method #

Variants returns the part of the tag holding all variants or the empty string if there are no variants defined.

func (t Tag) Variants() string

acceptMinSize method #

acceptMinSize parses multiple tokens of the given size or greater. It returns the end position of the last token consumed.

func (s *scanner) acceptMinSize(min int) (end int)

addLikelySubtags method #

addLikelySubtags sets subtags to their most likely value, given the locale. In most cases this means setting fields for unknown values, but in some cases it may alter a value. It returns an ErrMissingLikelyTagsData error if the given locale cannot be expanded.

func (t Tag) addLikelySubtags() (Tag, error)

addTags function #

func addTags(t Tag) (Tag, error)

appendTokens function #

func appendTokens(b []byte, token ...string) int

deleteRange method #

deleteRange removes the given range from s.b before the current token.

func (s *scanner) deleteRange(start int, end int)

equalTags method #

equalTags compares language, script and region subtags only.

func (t Tag) equalTags(a Tag) bool

findIndex function #

findIndex tries to find the given tag in idx and returns a standardized error if it could not be found.

func findIndex(idx tag.Index, key []byte, form string) (index int, err error)

findTypeForKey method #

findTypeForKey returns the start and end position for the type corresponding to key or the point at which to insert the key-value pair if the type wasn't found. The hasExt return value reports whether an -u extension was present. Note: the extensions are typically very small and are likely to contain only one key-type pair.

func (t Tag) findTypeForKey(key string) (start int, sep int, end int, hasExt bool)

genCoreBytes method #

genCoreBytes writes a string for the base languages, script and region tags to the given buffer and returns the number of bytes written. It will never write more than maxCoreSize bytes.

func (t *Tag) genCoreBytes(buf []byte) int

getExtension function #

getExtension returns the name, body and end position of the extension.

func getExtension(s string, p int) (end int, ext string)

getLangID function #

getLangID returns the langID of s if s is a canonical subtag or langUnknown if s is not a canonical subtag.

func getLangID(s []byte) (Language, error)

getLangISO2 function #

getLangISO2 returns the langID for the given 2-letter ISO language code or unknownLang if this does not exist.

func getLangISO2(s []byte) (Language, error)

getLangISO3 function #

getLangISO3 returns the langID for the given 3-letter ISO language code or unknownLang if this does not exist.

func getLangISO3(s []byte) (Language, error)

getRegionID function #

getRegionID returns the region id for s if s is a valid 2-letter region code or unknownRegion.

func getRegionID(s []byte) (Region, error)

getRegionISO2 function #

getRegionISO2 returns the regionID for the given 2-letter ISO country code or unknownRegion if this does not exist.

func getRegionISO2(s []byte) (Region, error)

getRegionISO3 function #

getRegionISO3 returns the regionID for the given 3-letter ISO country code or unknownRegion if this does not exist.

func getRegionISO3(s []byte) (Region, error)

getRegionM49 function #

func getRegionM49(n int) (Region, error)

getScriptID function #

getScriptID returns the script id for string s. It assumes that s is of the format [A-Z][a-z]{3}.

func getScriptID(idx tag.Index, s []byte) (Script, error)

gobble method #

gobble removes the current token from the input. Caller must call scan after calling gobble.

func (s *scanner) gobble(e error)

grandfathered function #

func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool)

init method #

func (s *scanner) init()

intToStr function #

converts the given integer to the original ASCII string passed to strToInt. len(s) must match the number of characters obtained.

func intToStr(v uint, s []byte)

isAlpha function #

isAlpha returns true if the byte is not a digit. b must be an ASCII letter or digit.

func isAlpha(b byte) bool

isAlphaNum function #

isAlphaNum returns true if the string contains only ASCII letters or digits.

func isAlphaNum(s []byte) bool

makeScanner function #

makeScanner returns a scanner using b as the input buffer. b is not copied and may be modified by the scanner routines.

func makeScanner(b []byte) scanner

makeScannerString function #

func makeScannerString(s string) scanner

minimize method #

minimize removes the region or script subtags from t such that t.addLikelySubtags() == t.minimize().addLikelySubtags().

func (t Tag) minimize() (Tag, error)

minimizeTags function #

minimizeTags mimics the behavior of the ICU 51 C implementation.

func minimizeTags(t Tag) (Tag, error)

nextExtension function #

nextExtension finds the next extension within the string, searching for the -- pattern from position p. In the fast majority of cases, language tags will have at most one extension and extensions tend to be small.

func nextExtension(s string, p int) int

normLang function #

normLang returns the mapped langID of id according to mapping m.

func normLang(id Language) (Language, AliasType)

normRegion function #

normRegion returns a region if r is deprecated or 0 otherwise. TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ). TODO: consider mapping split up regions to new most populous one (like CLDR).

func normRegion(r Region) Region

parse function #

func parse(scan *scanner, s string) (t Tag, err error)

parseExtension function #

parseExtension parses a single extension and returns the position of the extension end.

func parseExtension(scan *scanner) int

parseExtensions function #

parseExtensions parses and normalizes the extensions in the buffer. It returns the last position of scan.b that is part of any extension. It also trims scan.b to remove excess parts accordingly.

func parseExtensions(scan *scanner) int

parseTag function #

parseTag parses language, script, region and variants. It returns a Tag and the end position in the input that was parsed. If doNorm is true, then - will be normalized to .

func parseTag(scan *scanner, doNorm bool) (t Tag, end int)

parseVariants function #

parseVariants scans tokens as long as each token is a valid variant string. Duplicate variants are removed.

func parseVariants(scan *scanner, end int, t Tag) int

replace method #

replace replaces the current token with repl.

func (s *scanner) replace(repl string)

resizeRange method #

resizeRange shrinks or grows the array at position oldStart such that a new string of size newSize can fit between oldStart and oldEnd. Sets the scan point to after the resized range.

func (s *scanner) resizeRange(oldStart int, oldEnd int, newSize int)

scan method #

scan parses the next token of a BCP 47 string. Tokens that are larger than 8 characters or include non-alphanumeric characters result in an error and are gobbled and removed from the output. It returns the end position of the last token consumed.

func (s *scanner) scan() (end int)

searchUint function #

func searchUint(imap []uint16, key uint16) int

setError method #

func (s *scanner) setError(e error)

setTagsFrom method #

func (t *Tag) setTagsFrom(id Tag)

setUndefinedLang method #

func (t *Tag) setUndefinedLang(id Language)

setUndefinedRegion method #

func (t *Tag) setUndefinedRegion(id Region)

setUndefinedScript method #

func (t *Tag) setUndefinedScript(id Script)

specializeRegion function #

specializeRegion attempts to specialize a group region.

func specializeRegion(t *Tag) bool

strToInt function #

func strToInt(s []byte) uint

tag method #

func (e ValueError) tag() []byte

toLower method #

restToLower converts the string between start and end to lower case.

func (s *scanner) toLower(start int, end int)

tokenLen function #

func tokenLen(token ...string) (n int)

typ method #

func (r Region) typ() byte

Generated with Arrow