syntax

Imports

Imports #

"strconv"
"strings"
"unicode"
"unicode/utf8"
"slices"
"strconv"
"strings"
"unicode"
"unicode"
"strconv"
"sort"
"strings"
"unicode"
"unicode/utf8"

Constants & Variables

ClassNL const #

const ClassNL

DotNL const #

const DotNL

EmptyBeginLine const #

const EmptyBeginLine EmptyOp = *ast.BinaryExpr

EmptyBeginText const #

const EmptyBeginText

EmptyEndLine const #

const EmptyEndLine

EmptyEndText const #

const EmptyEndText

EmptyNoWordBoundary const #

const EmptyNoWordBoundary

EmptyWordBoundary const #

const EmptyWordBoundary

ErrInternalError const #

Unexpected error

const ErrInternalError ErrorCode = "regexp/syntax: internal error"

ErrInvalidCharClass const #

Parse errors

const ErrInvalidCharClass ErrorCode = "invalid character class"

ErrInvalidCharRange const #

const ErrInvalidCharRange ErrorCode = "invalid character class range"

ErrInvalidEscape const #

const ErrInvalidEscape ErrorCode = "invalid escape sequence"

ErrInvalidNamedCapture const #

const ErrInvalidNamedCapture ErrorCode = "invalid named capture"

ErrInvalidPerlOp const #

const ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"

ErrInvalidRepeatOp const #

const ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"

ErrInvalidRepeatSize const #

const ErrInvalidRepeatSize ErrorCode = "invalid repeat count"

ErrInvalidUTF8 const #

const ErrInvalidUTF8 ErrorCode = "invalid UTF-8"

ErrLarge const #

const ErrLarge ErrorCode = "expression too large"

ErrMissingBracket const #

const ErrMissingBracket ErrorCode = "missing closing ]"

ErrMissingParen const #

const ErrMissingParen ErrorCode = "missing closing )"

ErrMissingRepeatArgument const #

const ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"

ErrNestingDepth const #

const ErrNestingDepth ErrorCode = "expression nests too deeply"

ErrTrailingBackslash const #

const ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"

ErrUnexpectedParen const #

const ErrUnexpectedParen ErrorCode = "unexpected )"

FoldCase const #

const FoldCase Flags = *ast.BinaryExpr

InstAlt const #

const InstAlt InstOp = iota

InstAltMatch const #

const InstAltMatch

InstCapture const #

const InstCapture

InstEmptyWidth const #

const InstEmptyWidth

InstFail const #

const InstFail

InstMatch const #

const InstMatch

InstNop const #

const InstNop

InstRune const #

const InstRune

InstRune1 const #

const InstRune1

InstRuneAny const #

const InstRuneAny

InstRuneAnyNotNL const #

const InstRuneAnyNotNL

Literal const #

const Literal

MatchNL const #

const MatchNL = *ast.BinaryExpr

NonGreedy const #

const NonGreedy

OneLine const #

const OneLine

OpAlternate const #

const OpAlternate

OpAnyChar const #

const OpAnyChar

OpAnyCharNotNL const #

const OpAnyCharNotNL

OpBeginLine const #

const OpBeginLine

OpBeginText const #

const OpBeginText

OpCapture const #

const OpCapture

OpCharClass const #

const OpCharClass

OpConcat const #

const OpConcat

OpEmptyMatch const #

const OpEmptyMatch

OpEndLine const #

const OpEndLine

OpEndText const #

const OpEndText

OpLiteral const #

const OpLiteral

OpNoMatch const #

const OpNoMatch Op = *ast.BinaryExpr

OpNoWordBoundary const #

const OpNoWordBoundary

OpPlus const #

const OpPlus

OpQuest const #

const OpQuest

OpRepeat const #

const OpRepeat

OpStar const #

const OpStar

OpWordBoundary const #

const OpWordBoundary

POSIX const #

const POSIX Flags = 0

Perl const #

const Perl = *ast.BinaryExpr

PerlX const #

const PerlX

Simple const #

const Simple

UnicodeGroups const #

const UnicodeGroups

WasDollar const #

const WasDollar

_Op_index_0 var #

var _Op_index_0 = [...]uint8{...}

_Op_name_0 const #

const _Op_name_0 = "NoMatchEmptyMatchLiteralCharClassAnyCharNotNLAnyCharBeginLineEndLineBeginTextEndTextWordBoundaryNoWordBoundaryCaptureStarPlusQuestRepeatConcatAlternate"

_Op_name_1 const #

const _Op_name_1 = "opPseudo"

anyRune var #

var anyRune = []rune{...}

anyRuneNotNL var #

var anyRuneNotNL = []rune{...}

anyTable var #

var anyTable = *ast.UnaryExpr

code1 var #

var code1 = []rune{...}

code10 var #

var code10 = []rune{...}

code11 var #

var code11 = []rune{...}

code12 var #

var code12 = []rune{...}

code13 var #

var code13 = []rune{...}

code14 var #

var code14 = []rune{...}

code15 var #

var code15 = []rune{...}

code16 var #

var code16 = []rune{...}

code17 var #

var code17 = []rune{...}

code2 var #

var code2 = []rune{...}

code3 var #

var code3 = []rune{...}

code4 var #

var code4 = []rune{...}

code5 var #

var code5 = []rune{...}

code6 var #

var code6 = []rune{...}

code7 var #

var code7 = []rune{...}

code8 var #

var code8 = []rune{...}

code9 var #

var code9 = []rune{...}

flagI const #

const flagI printFlags = *ast.BinaryExpr

flagM const #

const flagM

flagOff const #

const flagOff

flagPrec const #

const flagPrec

flagS const #

const flagS

instOpNames var #

var instOpNames = []string{...}

instSize const #

maxSize is the maximum size of a compiled regexp in Insts. It too is somewhat arbitrarily chosen, but the idea is to be large enough to allow significant regexps while at the same time small enough that the compiled form will not take up too much memory. 128 MB is enough for a 3.3 million Inst structures, which roughly corresponds to a 3.3 MB regexp.

const instSize = *ast.BinaryExpr

maxFold const #

const maxFold = 0x1e943

maxHeight const #

maxHeight is the maximum height of a regexp parse tree. It is somewhat arbitrarily chosen, but the idea is to be large enough that no one will actually hit in real use but at the same time small enough that recursion on the Regexp tree will not hit the 1GB Go stack limit. The maximum amount of stack for a single recursive frame is probably closer to 1kB, so this could potentially be raised, but it seems unlikely that people have regexps nested even this deeply. We ran a test on Google's C++ code base and turned up only a single use case with depth > 100; it had depth 128. Using depth 1000 should be plenty of margin. As an optimization, we don't even bother calculating heights until we've allocated at least maxHeight Regexp structures.

const maxHeight = 1000

maxRunes const #

maxRunes is the maximum number of runes allowed in a regexp tree counting the runes in all the nodes. Ignoring character classes p.numRunes is always less than the length of the regexp. Character classes can make it much larger: each \pL adds 1292 runes. 128 MB is enough for 32M runes, which is over 26k \pL instances. Note that repetitions do not make copies of the rune slices, so \pL{1000} is only one rune slice, not 1000. We could keep a cache of character classes we've seen, so that all the \pL we see use the same rune list, but that doesn't remove the problem entirely: consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()]. And because the Rune slice is exposed directly in the Regexp, there is not an opportunity to change the representation to allow partial sharing between different character classes. So the limit is the best we can do.

const maxRunes = *ast.BinaryExpr

maxSize const #

const maxSize = *ast.BinaryExpr

meta const #

const meta = `\.+*?()|[]{}^$`

minFold const #

minimum and maximum runes involved in folding. checked during test.

const minFold = 0x0041

negShift const #

const negShift = 5

noMatch const #

const noMatch = *ast.UnaryExpr

opLeftParen const #

Pseudo-ops for parsing stack.

const opLeftParen = *ast.BinaryExpr

opPseudo const #

const opPseudo Op = 128

opVerticalBar const #

Pseudo-ops for parsing stack.

const opVerticalBar

perlGroup var #

var perlGroup = map[string]charGroup{...}

posixGroup var #

var posixGroup = map[string]charGroup{...}

runeSize const #

const runeSize = 4

Type Aliases

EmptyOp type #

An EmptyOp specifies a kind or mixture of zero-width assertions.

type EmptyOp uint8

ErrorCode type #

An ErrorCode describes a failure to parse a regular expression.

type ErrorCode string

Flags type #

Flags control the behavior of the parser and record information about regexp context.

type Flags uint16

InstOp type #

An InstOp is an instruction opcode.

type InstOp uint8

Op type #

An Op is a single regular expression operator.

type Op uint8

printFlags type #

printFlags is a bit set indicating which flags (including non-capturing parens) to print around a regexp.

type printFlags uint8

Structs

Error struct #

An Error describes a failure to parse a regular expression and gives the offending expression.

type Error struct {
Code ErrorCode
Expr string
}

Inst struct #

An Inst is a single instruction in a regular expression program.

type Inst struct {
Op InstOp
Out uint32
Arg uint32
Rune []rune
}

Prog struct #

A Prog is a compiled regular expression program.

type Prog struct {
Inst []Inst
Start int
NumCap int
}

Regexp struct #

A Regexp is a node in a regular expression syntax tree.

type Regexp struct {
Op Op
Flags Flags
Sub []*Regexp
Sub0 [1]*Regexp
Rune []rune
Rune0 [2]rune
Min int
Max int
Cap int
Name string
}

charGroup struct #

type charGroup struct {
sign int
class []rune
}

compiler struct #

type compiler struct {
p *Prog
}

frag struct #

A frag represents a compiled program fragment.

type frag struct {
i uint32
out patchList
nullable bool
}

parser struct #

type parser struct {
flags Flags
stack []*Regexp
free *Regexp
numCap int
wholeRegexp string
tmpClass []rune
numRegexp int
numRunes int
repeats int64
height map[*Regexp]int
size map[*Regexp]int64
}

patchList struct #

A patchList is a list of instruction pointers that need to be filled in (patched). Because the pointers haven't been filled in yet, we can reuse their storage to hold the list. It's kind of sleazy, but works well in practice. See https://swtch.com/~rsc/regexp/regexp1.html for inspiration. These aren't really pointers: they're integers, so we can reinterpret them this way without using package unsafe. A value l.head denotes p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1). head == 0 denotes the empty list, okay because we start every program with a fail instruction, so we'll never want to point at its output link.

type patchList struct {
head uint32
tail uint32
}

ranges struct #

ranges implements sort.Interface on a []rune. The choice of receiver type definition is strange but avoids an allocation since we already have a *[]rune.

type ranges struct {
p *[]rune
}

Functions

CapNames method #

CapNames walks the regexp to find the names of capturing groups.

func (re *Regexp) CapNames() []string

Compile function #

Compile compiles the regexp into a program to be executed. The regexp should have been simplified already (returned from re.Simplify).

func Compile(re *Regexp) (*Prog, error)

EmptyOpContext function #

EmptyOpContext returns the zero-width assertions satisfied at the position between the runes r1 and r2. Passing r1 == -1 indicates that the position is at the beginning of the text. Passing r2 == -1 indicates that the position is at the end of the text.

func EmptyOpContext(r1 rune, r2 rune) EmptyOp

Equal method #

Equal reports whether x and y have identical structure.

func (x *Regexp) Equal(y *Regexp) bool

Error method #

func (e *Error) Error() string

IsWordChar function #

IsWordChar reports whether r is considered a “word character” during the evaluation of the \b and \B zero-width assertions. These assertions are ASCII-only: the word characters are [A-Za-z0-9_].

func IsWordChar(r rune) bool

Len method #

func (ra ranges) Len() int

Less method #

func (ra ranges) Less(i int, j int) bool

MatchEmptyWidth method #

MatchEmptyWidth reports whether the instruction matches an empty string between the runes before and after. It should only be called when i.Op == [InstEmptyWidth].

func (i *Inst) MatchEmptyWidth(before rune, after rune) bool

MatchRune method #

MatchRune reports whether the instruction matches (and consumes) r. It should only be called when i.Op == [InstRune].

func (i *Inst) MatchRune(r rune) bool

MatchRunePos method #

MatchRunePos checks whether the instruction matches (and consumes) r. If so, MatchRunePos returns the index of the matching rune pair (or, when len(i.Rune) == 1, rune singleton). If not, MatchRunePos returns -1. MatchRunePos should only be called when i.Op == [InstRune].

func (i *Inst) MatchRunePos(r rune) int

MaxCap method #

MaxCap walks the regexp to find the maximum capture index.

func (re *Regexp) MaxCap() int

Parse function #

Parse parses a regular expression string s, controlled by the specified Flags, and returns a regular expression parse tree. The syntax is described in the top-level comment.

func Parse(s string, flags Flags) (*Regexp, error)

Prefix method #

Prefix returns a literal string that all matches for the regexp must start with. Complete is true if the prefix is the entire match.

func (p *Prog) Prefix() (prefix string, complete bool)

Simplify method #

Simplify returns a regexp equivalent to re but without counted repetitions and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/. The resulting regexp will execute correctly but its string representation will not produce the same parse tree, because capturing parentheses may have been duplicated or removed. For example, the simplified form for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1. The returned regexp may share structure with or be the original.

func (re *Regexp) Simplify() *Regexp

StartCond method #

StartCond returns the leading empty-width conditions that must be true in any match. It returns ^EmptyOp(0) if no matches are possible.

func (p *Prog) StartCond() EmptyOp

String method #

func (i InstOp) String() string

String method #

func (i *Inst) String() string

String method #

func (i Op) String() string

String method #

func (re *Regexp) String() string

String method #

func (e ErrorCode) String() string

String method #

func (p *Prog) String() string

Swap method #

func (ra ranges) Swap(i int, j int)

_ function #

func _()

addSpan function #

addSpan enables the flags f around start..last, by setting flags[start] = f and flags[last] = flagOff.

func addSpan(start *Regexp, last *Regexp, f printFlags, flags *map[*Regexp]printFlags)

alt method #

func (c *compiler) alt(f1 frag, f2 frag) frag

alternate method #

alternate replaces the top of the stack (above the topmost '(') with its alternation.

func (p *parser) alternate() *Regexp

append method #

func (l1 patchList) append(p *Prog, l2 patchList) patchList

appendClass function #

appendClass returns the result of appending the class x to the class r. It assume x is clean.

func appendClass(r []rune, x []rune) []rune

appendFoldedClass function #

appendFoldedClass returns the result of appending the case folding of the class x to the class r.

func appendFoldedClass(r []rune, x []rune) []rune

appendFoldedRange function #

appendFoldedRange returns the result of appending the range lo-hi and its case folding-equivalent runes to the class r.

func appendFoldedRange(r []rune, lo rune, hi rune) []rune

appendGroup method #

func (p *parser) appendGroup(r []rune, g charGroup) []rune

appendLiteral function #

appendLiteral returns the result of appending the literal x to the class r.

func appendLiteral(r []rune, x rune, flags Flags) []rune

appendNegatedClass function #

appendNegatedClass returns the result of appending the negation of the class x to the class r. It assumes x is clean.

func appendNegatedClass(r []rune, x []rune) []rune

appendNegatedTable function #

appendNegatedTable returns the result of appending the negation of x to the class r.

func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune

appendRange function #

appendRange returns the result of appending the range lo-hi to the class r.

func appendRange(r []rune, lo rune, hi rune) []rune

appendTable function #

appendTable returns the result of appending x to the class r.

func appendTable(r []rune, x *unicode.RangeTable) []rune

bw function #

func bw(b *strings.Builder, args ...string)

calcFlags function #

calcFlags calculates the flags to print around each subexpression in re, storing that information in (*flags)[sub] for each affected subexpression. The first time an entry needs to be written to *flags, calcFlags allocates the map. calcFlags also calculates the flags that must be active or can't be active around re and returns those flags.

func calcFlags(re *Regexp, flags *map[*Regexp]printFlags) (must printFlags, cant printFlags)

calcHeight method #

func (p *parser) calcHeight(re *Regexp, force bool) int

calcSize method #

func (p *parser) calcSize(re *Regexp, force bool) int64

cap method #

func (c *compiler) cap(arg uint32) frag

capNames method #

func (re *Regexp) capNames(names []string)

cat method #

func (c *compiler) cat(f1 frag, f2 frag) frag

checkHeight method #

func (p *parser) checkHeight(re *Regexp)

checkLimits method #

func (p *parser) checkLimits(re *Regexp)

checkSize method #

func (p *parser) checkSize(re *Regexp)

checkUTF8 function #

func checkUTF8(s string) error

cleanAlt function #

cleanAlt cleans re for eventual inclusion in an alternation.

func cleanAlt(re *Regexp)

cleanClass function #

cleanClass sorts the ranges (pairs of elements of r), merges them, and eliminates duplicates.

func cleanClass(rp *[]rune) []rune

collapse method #

collapse returns the result of applying op to sub. If sub contains op nodes, they all get hoisted up so that there is never a concat of a concat or an alternate of an alternate.

func (p *parser) collapse(subs []*Regexp, op Op) *Regexp

compile method #

func (c *compiler) compile(re *Regexp) frag

concat method #

concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.

func (p *parser) concat() *Regexp

dumpInst function #

func dumpInst(b *strings.Builder, i *Inst)

dumpProg function #

func dumpProg(b *strings.Builder, p *Prog)

empty method #

func (c *compiler) empty(op EmptyOp) frag

escape function #

func escape(b *strings.Builder, r rune, force bool)

factor method #

factor factors common prefixes from the alternation list sub. It returns a replacement list that reuses the same storage and frees (passes to p.reuse) any removed *Regexps. For example, ABC|ABD|AEF|BCX|BCY simplifies by literal prefix extraction to A(B(C|D)|EF)|BC(X|Y) which simplifies by character class introduction to A(B[CD]|EF)|BC[XY]

func (p *parser) factor(sub []*Regexp) []*Regexp

fail method #

func (c *compiler) fail() frag

inCharClass function #

inCharClass reports whether r is in the class. It assumes the class has been cleaned by cleanClass.

func inCharClass(r rune, class []rune) bool

init method #

func (c *compiler) init()

inst method #

func (c *compiler) inst(op InstOp) frag

isCharClass function #

can this be represented as a character class? single-rune literal string, char class, ., and .|\n.

func isCharClass(re *Regexp) bool

isValidCaptureName function #

isValidCaptureName reports whether name is a valid capture name: [A-Za-z0-9_]+. PCRE limits names to 32 bytes. Python rejects names starting with digits. We don't enforce either of those.

func isValidCaptureName(name string) bool

isalnum function #

func isalnum(c rune) bool

leadingRegexp method #

leadingRegexp returns the leading regexp that re begins with. The regexp refers to storage in re or its children.

func (p *parser) leadingRegexp(re *Regexp) *Regexp

leadingString method #

leadingString returns the leading literal string that re begins with. The string refers to storage in re or its children.

func (p *parser) leadingString(re *Regexp) ([]rune, Flags)

literal method #

literal pushes a literal regexp for the rune r on the stack.

func (p *parser) literal(r rune)

literalRegexp function #

func literalRegexp(s string, flags Flags) *Regexp

loop method #

loop returns the fragment for the main loop of a plus or star. For plus, it can be used after changing the entry to f1.i. For star, it can be used directly when f1 can't match an empty string. (When f1 can match an empty string, f1* must be implemented as (f1+)? to get the priority match order correct.)

func (c *compiler) loop(f1 frag, nongreedy bool) frag

makePatchList function #

func makePatchList(n uint32) patchList

matchRune function #

does re match r?

func matchRune(re *Regexp, r rune) bool

maybeConcat method #

maybeConcat implements incremental concatenation of literal runes into string nodes. The parser calls this before each push, so only the top fragment of the stack might need processing. Since this is called before a push, the topmost literal is no longer subject to operators like * (Otherwise ab* would turn into (ab)*.) If r >= 0 and there's a node left over, maybeConcat uses it to push r with the given flags. maybeConcat reports whether r was pushed.

func (p *parser) maybeConcat(r rune, flags Flags) bool

mergeCharClass function #

mergeCharClass makes dst = dst|src. The caller must ensure that dst.Op >= src.Op, to reduce the amount of copying.

func mergeCharClass(dst *Regexp, src *Regexp)

minFoldRune function #

minFoldRune returns the minimum rune fold-equivalent to r.

func minFoldRune(r rune) rune

negateClass function #

negateClass overwrites r and returns r's negation. It assumes the class r is already clean.

func negateClass(r []rune) []rune

newRegexp method #

func (p *parser) newRegexp(op Op) *Regexp

nextRune function #

func nextRune(s string) (c rune, t string, err error)

nop method #

func (c *compiler) nop() frag

op method #

op pushes a regexp with the given op onto the stack and returns that regexp.

func (p *parser) op(op Op) *Regexp

op method #

op returns i.Op but merges all the Rune special cases into InstRune

func (i *Inst) op() InstOp

parse function #

func parse(s string, flags Flags) (_ *Regexp, err error)

parseClass method #

parseClass parses a character class at the beginning of s and pushes it onto the parse stack.

func (p *parser) parseClass(s string) (rest string, err error)

parseClassChar method #

parseClassChar parses a character class character at the beginning of s and returns it.

func (p *parser) parseClassChar(s string, wholeClass string) (r rune, rest string, err error)

parseEscape method #

parseEscape parses an escape sequence at the beginning of s and returns the rune.

func (p *parser) parseEscape(s string) (r rune, rest string, err error)

parseInt method #

parseInt parses a decimal integer.

func (p *parser) parseInt(s string) (n int, rest string, ok bool)

parseNamedClass method #

parseNamedClass parses a leading POSIX named character class like [:alnum:] from the beginning of s. If one is present, it appends the characters to r and returns the new slice r and the remainder of the string.

func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error)

parsePerlClassEscape method #

parsePerlClassEscape parses a leading Perl character class escape like \d from the beginning of s. If one is present, it appends the characters to r and returns the new slice r and the remainder of the string.

func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string)

parsePerlFlags method #

parsePerlFlags parses a Perl flag setting or non-capturing group or both, like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state. The caller must have ensured that s begins with "(?".

func (p *parser) parsePerlFlags(s string) (rest string, err error)

parseRepeat method #

parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}. If s is not of that form, it returns ok == false. If s has the right form but the values are too big, it returns min == -1, ok == true.

func (p *parser) parseRepeat(s string) (min int, max int, rest string, ok bool)

parseRightParen method #

parseRightParen handles a ) in the input.

func (p *parser) parseRightParen() error

parseUnicodeClass method #

parseUnicodeClass parses a leading Unicode character class like \p{Han} from the beginning of s. If one is present, it appends the characters to r and returns the new slice r and the remainder of the string.

func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error)

parseVerticalBar method #

parseVerticalBar handles a | in the input.

func (p *parser) parseVerticalBar()

patch method #

func (l patchList) patch(p *Prog, val uint32)

plus method #

func (c *compiler) plus(f1 frag, nongreedy bool) frag

push method #

push pushes the regexp re onto the parse stack and returns the regexp.

func (p *parser) push(re *Regexp) *Regexp

quest method #

func (c *compiler) quest(f1 frag, nongreedy bool) frag

removeLeadingRegexp method #

removeLeadingRegexp removes the leading regexp in re. It returns the replacement for re. If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.

func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp

removeLeadingString method #

removeLeadingString removes the first n leading runes from the beginning of re. It returns the replacement for re.

func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp

repeat method #

repeat replaces the top stack element with itself repeated according to op, min, max. before is the regexp suffix starting at the repetition operator. after is the regexp suffix following after the repetition operator. repeat returns an updated 'after' and an error, if any.

func (p *parser) repeat(op Op, min int, max int, before string, after string, lastRepeat string) (string, error)

repeatIsValid function #

repeatIsValid reports whether the repetition re is valid. Valid means that the combination of the top-level repetition and any inner repetitions does not exceed n copies of the innermost thing. This function rewalks the regexp tree and is called for every repetition, so we have to worry about inducing quadratic behavior in the parser. We avoid this by only calling repeatIsValid when min or max >= 2. In that case the depth of any >= 2 nesting can only get to 9 without triggering a parse error, so each subtree can only be rewalked 9 times.

func repeatIsValid(re *Regexp, n int) bool

reuse method #

func (p *parser) reuse(re *Regexp)

rune method #

func (c *compiler) rune(r []rune, flags Flags) frag

simplify1 function #

simplify1 implements Simplify for the unary OpStar, OpPlus, and OpQuest operators. It returns the simple regexp equivalent to Regexp{Op: op, Flags: flags, Sub: {sub}} under the assumption that sub is already simple, and without first allocating that structure. If the regexp to be returned turns out to be equivalent to re, simplify1 returns re instead. simplify1 is factored out of Simplify because the implementation for other operators generates these unary expressions. Letting them call simplify1 makes sure the expressions they generate are simple.

func simplify1(op Op, flags Flags, sub *Regexp, re *Regexp) *Regexp

skipNop method #

skipNop follows any no-op or capturing instructions.

func (p *Prog) skipNop(pc uint32) *Inst

star method #

func (c *compiler) star(f1 frag, nongreedy bool) frag

swapVerticalBar method #

If the top of the stack is an element followed by an opVerticalBar swapVerticalBar swaps the two and returns true. Otherwise it returns false.

func (p *parser) swapVerticalBar() bool

u32 function #

func u32(i uint32) string

unhex function #

func unhex(c rune) rune

unicodeTable function #

unicodeTable returns the unicode.RangeTable identified by name and the table of additional fold-equivalent code points.

func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable)

writeRegexp function #

writeRegexp writes the Perl syntax for the regular expression re to b.

func writeRegexp(b *strings.Builder, re *Regexp, f printFlags, flags map[*Regexp]printFlags)

Generated with Arrow