/regexp/regexp.go

https://code.google.com/p/appengine-go-backports/ · Go · 905 lines · 563 code · 90 blank · 252 comment · 130 complexity · 78fcbd094d1a0cd0f4937ae0c6400aee MD5 · raw file

  1. // Use of this source code is governed by a BSD-style
  2. // license that can be found in the LICENSE file.
  3. // Package regexp implements a simple regular expression library.
  4. //
  5. // The syntax of the regular expressions accepted is the same
  6. // general syntax used by Perl, Python, and other languages.
  7. // More precisely, it is the syntax accepted by RE2 and described at
  8. // http://code.google.com/p/re2/wiki/Syntax, except for \C.
  9. //
  10. // All characters are UTF-8-encoded code points.
  11. //
  12. // There are 16 methods of Regexp that match a regular expression and identify
  13. // the matched text. Their names are matched by this regular expression:
  14. //
  15. // Find(All)?(String)?(Submatch)?(Index)?
  16. //
  17. // If 'All' is present, the routine matches successive non-overlapping
  18. // matches of the entire expression. Empty matches abutting a preceding
  19. // match are ignored. The return value is a slice containing the successive
  20. // return values of the corresponding non-'All' routine. These routines take
  21. // an extra integer argument, n; if n >= 0, the function returns at most n
  22. // matches/submatches.
  23. //
  24. // If 'String' is present, the argument is a string; otherwise it is a slice
  25. // of bytes; return values are adjusted as appropriate.
  26. //
  27. // If 'Submatch' is present, the return value is a slice identifying the
  28. // successive submatches of the expression. Submatches are matches of
  29. // parenthesized subexpressions within the regular expression, numbered from
  30. // left to right in order of opening parenthesis. Submatch 0 is the match of
  31. // the entire expression, submatch 1 the match of the first parenthesized
  32. // subexpression, and so on.
  33. //
  34. // If 'Index' is present, matches and submatches are identified by byte index
  35. // pairs within the input string: result[2*n:2*n+1] identifies the indexes of
  36. // the nth submatch. The pair for n==0 identifies the match of the entire
  37. // expression. If 'Index' is not present, the match is identified by the
  38. // text of the match/submatch. If an index is negative, it means that
  39. // subexpression did not match any string in the input.
  40. //
  41. // There is also a subset of the methods that can be applied to text read
  42. // from a RuneReader:
  43. //
  44. // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
  45. //
  46. // This set may grow. Note that regular expression matches may need to
  47. // examine text beyond the text returned by a match, so the methods that
  48. // match text from a RuneReader may read arbitrarily far into the input
  49. // before returning.
  50. //
  51. // (There are a few other methods that do not match this pattern.)
  52. //
  53. package regexp
  54. import (
  55. "bytes"
  56. "io"
  57. "os"
  58. "appengine-go-backports/regexp/syntax"
  59. "strconv"
  60. "strings"
  61. "sync"
  62. "utf8"
  63. )
  64. var debug = false
  65. // Error is the local type for a parsing error.
  66. type Error string
  67. func (e Error) String() string {
  68. return string(e)
  69. }
  70. // Regexp is the representation of a compiled regular expression.
  71. // The public interface is entirely through methods.
  72. // A Regexp is safe for concurrent use by multiple goroutines.
  73. type Regexp struct {
  74. // read-only after Compile
  75. expr string // as passed to Compile
  76. prog *syntax.Prog // compiled program
  77. prefix string // required prefix in unanchored matches
  78. prefixBytes []byte // prefix, as a []byte
  79. prefixComplete bool // prefix is the entire regexp
  80. prefixRune int // first rune in prefix
  81. cond syntax.EmptyOp // empty-width conditions required at start of match
  82. numSubexp int
  83. longest bool
  84. // cache of machines for running regexp
  85. mu sync.Mutex
  86. machine []*machine
  87. }
  88. // String returns the source text used to compile the regular expression.
  89. func (re *Regexp) String() string {
  90. return re.expr
  91. }
  92. // Compile parses a regular expression and returns, if successful,
  93. // a Regexp object that can be used to match against text.
  94. //
  95. // When matching against text, the regexp returns a match that
  96. // begins as early as possible in the input (leftmost), and among those
  97. // it chooses the one that a backtracking search would have found first.
  98. // This so-called leftmost-first matching is the same semantics
  99. // that Perl, Python, and other implementations use, although this
  100. // package implements it without the expense of backtracking.
  101. // For POSIX leftmost-longest matching, see CompilePOSIX.
  102. func Compile(expr string) (*Regexp, os.Error) {
  103. return compile(expr, syntax.Perl, false)
  104. }
  105. // CompilePOSIX is like Compile but restricts the regular expression
  106. // to POSIX ERE (egrep) syntax and changes the match semantics to
  107. // leftmost-longest.
  108. //
  109. // That is, when matching against text, the regexp returns a match that
  110. // begins as early as possible in the input (leftmost), and among those
  111. // it chooses a match that is as long as possible.
  112. // This so-called leftmost-longest matching is the same semantics
  113. // that early regular expression implementations used and that POSIX
  114. // specifies.
  115. //
  116. // However, there can be multiple leftmost-longest matches, with different
  117. // submatch choices, and here this package diverges from POSIX.
  118. // Among the possible leftmost-longest matches, this package chooses
  119. // the one that a backtracking search would have found first, while POSIX
  120. // specifies that the match be chosen to maximize the length of the first
  121. // subexpression, then the second, and so on from left to right.
  122. // The POSIX rule is computationally prohibitive and not even well-defined.
  123. // See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
  124. func CompilePOSIX(expr string) (*Regexp, os.Error) {
  125. return compile(expr, syntax.POSIX, true)
  126. }
  127. func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, os.Error) {
  128. re, err := syntax.Parse(expr, mode)
  129. if err != nil {
  130. return nil, err
  131. }
  132. maxCap := re.MaxCap()
  133. re = re.Simplify()
  134. prog, err := syntax.Compile(re)
  135. if err != nil {
  136. return nil, err
  137. }
  138. regexp := &Regexp{
  139. expr: expr,
  140. prog: prog,
  141. numSubexp: maxCap,
  142. cond: prog.StartCond(),
  143. longest: longest,
  144. }
  145. regexp.prefix, regexp.prefixComplete = prog.Prefix()
  146. if regexp.prefix != "" {
  147. // TODO(rsc): Remove this allocation by adding
  148. // IndexString to package bytes.
  149. regexp.prefixBytes = []byte(regexp.prefix)
  150. regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
  151. }
  152. return regexp, nil
  153. }
  154. // get returns a machine to use for matching re.
  155. // It uses the re's machine cache if possible, to avoid
  156. // unnecessary allocation.
  157. func (re *Regexp) get() *machine {
  158. re.mu.Lock()
  159. if n := len(re.machine); n > 0 {
  160. z := re.machine[n-1]
  161. re.machine = re.machine[:n-1]
  162. re.mu.Unlock()
  163. return z
  164. }
  165. re.mu.Unlock()
  166. z := progMachine(re.prog)
  167. z.re = re
  168. return z
  169. }
  170. // put returns a machine to the re's machine cache.
  171. // There is no attempt to limit the size of the cache, so it will
  172. // grow to the maximum number of simultaneous matches
  173. // run using re. (The cache empties when re gets garbage collected.)
  174. func (re *Regexp) put(z *machine) {
  175. re.mu.Lock()
  176. re.machine = append(re.machine, z)
  177. re.mu.Unlock()
  178. }
  179. // MustCompile is like Compile but panics if the expression cannot be parsed.
  180. // It simplifies safe initialization of global variables holding compiled regular
  181. // expressions.
  182. func MustCompile(str string) *Regexp {
  183. regexp, error := Compile(str)
  184. if error != nil {
  185. panic(`regexp: Compile(` + quote(str) + `): ` + error.String())
  186. }
  187. return regexp
  188. }
  189. // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
  190. // It simplifies safe initialization of global variables holding compiled regular
  191. // expressions.
  192. func MustCompilePOSIX(str string) *Regexp {
  193. regexp, error := CompilePOSIX(str)
  194. if error != nil {
  195. panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.String())
  196. }
  197. return regexp
  198. }
  199. func quote(s string) string {
  200. if strconv.CanBackquote(s) {
  201. return "`" + s + "`"
  202. }
  203. return strconv.Quote(s)
  204. }
  205. // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
  206. func (re *Regexp) NumSubexp() int {
  207. return re.numSubexp
  208. }
  209. const endOfText = -1
  210. // input abstracts different representations of the input text. It provides
  211. // one-character lookahead.
  212. type input interface {
  213. step(pos int) (rune int, width int) // advance one rune
  214. canCheckPrefix() bool // can we look ahead without losing info?
  215. hasPrefix(re *Regexp) bool
  216. index(re *Regexp, pos int) int
  217. context(pos int) syntax.EmptyOp
  218. }
  219. // inputString scans a string.
  220. type inputString struct {
  221. str string
  222. }
  223. func newInputString(str string) *inputString {
  224. return &inputString{str: str}
  225. }
  226. func (i *inputString) step(pos int) (int, int) {
  227. if pos < len(i.str) {
  228. c := i.str[pos]
  229. if c < utf8.RuneSelf {
  230. return int(c), 1
  231. }
  232. return utf8.DecodeRuneInString(i.str[pos:])
  233. }
  234. return endOfText, 0
  235. }
  236. func (i *inputString) canCheckPrefix() bool {
  237. return true
  238. }
  239. func (i *inputString) hasPrefix(re *Regexp) bool {
  240. return strings.HasPrefix(i.str, re.prefix)
  241. }
  242. func (i *inputString) index(re *Regexp, pos int) int {
  243. return strings.Index(i.str[pos:], re.prefix)
  244. }
  245. func (i *inputString) context(pos int) syntax.EmptyOp {
  246. r1, r2 := -1, -1
  247. if pos > 0 && pos <= len(i.str) {
  248. r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
  249. }
  250. if pos < len(i.str) {
  251. r2, _ = utf8.DecodeRuneInString(i.str[pos:])
  252. }
  253. return syntax.EmptyOpContext(r1, r2)
  254. }
  255. // inputBytes scans a byte slice.
  256. type inputBytes struct {
  257. str []byte
  258. }
  259. func newInputBytes(str []byte) *inputBytes {
  260. return &inputBytes{str: str}
  261. }
  262. func (i *inputBytes) step(pos int) (int, int) {
  263. if pos < len(i.str) {
  264. c := i.str[pos]
  265. if c < utf8.RuneSelf {
  266. return int(c), 1
  267. }
  268. return utf8.DecodeRune(i.str[pos:])
  269. }
  270. return endOfText, 0
  271. }
  272. func (i *inputBytes) canCheckPrefix() bool {
  273. return true
  274. }
  275. func (i *inputBytes) hasPrefix(re *Regexp) bool {
  276. return bytes.HasPrefix(i.str, re.prefixBytes)
  277. }
  278. func (i *inputBytes) index(re *Regexp, pos int) int {
  279. return bytes.Index(i.str[pos:], re.prefixBytes)
  280. }
  281. func (i *inputBytes) context(pos int) syntax.EmptyOp {
  282. r1, r2 := -1, -1
  283. if pos > 0 && pos <= len(i.str) {
  284. r1, _ = utf8.DecodeLastRune(i.str[:pos])
  285. }
  286. if pos < len(i.str) {
  287. r2, _ = utf8.DecodeRune(i.str[pos:])
  288. }
  289. return syntax.EmptyOpContext(r1, r2)
  290. }
  291. // inputReader scans a RuneReader.
  292. type inputReader struct {
  293. r io.RuneReader
  294. atEOT bool
  295. pos int
  296. }
  297. func newInputReader(r io.RuneReader) *inputReader {
  298. return &inputReader{r: r}
  299. }
  300. func (i *inputReader) step(pos int) (int, int) {
  301. if !i.atEOT && pos != i.pos {
  302. return endOfText, 0
  303. }
  304. r, w, err := i.r.ReadRune()
  305. if err != nil {
  306. i.atEOT = true
  307. return endOfText, 0
  308. }
  309. i.pos += w
  310. return r, w
  311. }
  312. func (i *inputReader) canCheckPrefix() bool {
  313. return false
  314. }
  315. func (i *inputReader) hasPrefix(re *Regexp) bool {
  316. return false
  317. }
  318. func (i *inputReader) index(re *Regexp, pos int) int {
  319. return -1
  320. }
  321. func (i *inputReader) context(pos int) syntax.EmptyOp {
  322. return 0
  323. }
  324. // LiteralPrefix returns a literal string that must begin any match
  325. // of the regular expression re. It returns the boolean true if the
  326. // literal string comprises the entire regular expression.
  327. func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
  328. return re.prefix, re.prefixComplete
  329. }
  330. // MatchReader returns whether the Regexp matches the text read by the
  331. // RuneReader. The return value is a boolean: true for match, false for no
  332. // match.
  333. func (re *Regexp) MatchReader(r io.RuneReader) bool {
  334. return re.doExecute(newInputReader(r), 0, 0) != nil
  335. }
  336. // MatchString returns whether the Regexp matches the string s.
  337. // The return value is a boolean: true for match, false for no match.
  338. func (re *Regexp) MatchString(s string) bool {
  339. return re.doExecute(newInputString(s), 0, 0) != nil
  340. }
  341. // Match returns whether the Regexp matches the byte slice b.
  342. // The return value is a boolean: true for match, false for no match.
  343. func (re *Regexp) Match(b []byte) bool {
  344. return re.doExecute(newInputBytes(b), 0, 0) != nil
  345. }
  346. // MatchReader checks whether a textual regular expression matches the text
  347. // read by the RuneReader. More complicated queries need to use Compile and
  348. // the full Regexp interface.
  349. func MatchReader(pattern string, r io.RuneReader) (matched bool, error os.Error) {
  350. re, err := Compile(pattern)
  351. if err != nil {
  352. return false, err
  353. }
  354. return re.MatchReader(r), nil
  355. }
  356. // MatchString checks whether a textual regular expression
  357. // matches a string. More complicated queries need
  358. // to use Compile and the full Regexp interface.
  359. func MatchString(pattern string, s string) (matched bool, error os.Error) {
  360. re, err := Compile(pattern)
  361. if err != nil {
  362. return false, err
  363. }
  364. return re.MatchString(s), nil
  365. }
  366. // Match checks whether a textual regular expression
  367. // matches a byte slice. More complicated queries need
  368. // to use Compile and the full Regexp interface.
  369. func Match(pattern string, b []byte) (matched bool, error os.Error) {
  370. re, err := Compile(pattern)
  371. if err != nil {
  372. return false, err
  373. }
  374. return re.Match(b), nil
  375. }
  376. // ReplaceAllString returns a copy of src in which all matches for the Regexp
  377. // have been replaced by repl. No support is provided for expressions
  378. // (e.g. \1 or $1) in the replacement string.
  379. func (re *Regexp) ReplaceAllString(src, repl string) string {
  380. return re.ReplaceAllStringFunc(src, func(string) string { return repl })
  381. }
  382. // ReplaceAllStringFunc returns a copy of src in which all matches for the
  383. // Regexp have been replaced by the return value of of function repl (whose
  384. // first argument is the matched string). No support is provided for
  385. // expressions (e.g. \1 or $1) in the replacement string.
  386. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
  387. lastMatchEnd := 0 // end position of the most recent match
  388. searchPos := 0 // position where we next look for a match
  389. buf := new(bytes.Buffer)
  390. for searchPos <= len(src) {
  391. a := re.doExecute(newInputString(src), searchPos, 2)
  392. if len(a) == 0 {
  393. break // no more matches
  394. }
  395. // Copy the unmatched characters before this match.
  396. io.WriteString(buf, src[lastMatchEnd:a[0]])
  397. // Now insert a copy of the replacement string, but not for a
  398. // match of the empty string immediately after another match.
  399. // (Otherwise, we get double replacement for patterns that
  400. // match both empty and nonempty strings.)
  401. if a[1] > lastMatchEnd || a[0] == 0 {
  402. io.WriteString(buf, repl(src[a[0]:a[1]]))
  403. }
  404. lastMatchEnd = a[1]
  405. // Advance past this match; always advance at least one character.
  406. _, width := utf8.DecodeRuneInString(src[searchPos:])
  407. if searchPos+width > a[1] {
  408. searchPos += width
  409. } else if searchPos+1 > a[1] {
  410. // This clause is only needed at the end of the input
  411. // string. In that case, DecodeRuneInString returns width=0.
  412. searchPos++
  413. } else {
  414. searchPos = a[1]
  415. }
  416. }
  417. // Copy the unmatched characters after the last match.
  418. io.WriteString(buf, src[lastMatchEnd:])
  419. return buf.String()
  420. }
  421. // ReplaceAll returns a copy of src in which all matches for the Regexp
  422. // have been replaced by repl. No support is provided for expressions
  423. // (e.g. \1 or $1) in the replacement text.
  424. func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
  425. return re.ReplaceAllFunc(src, func([]byte) []byte { return repl })
  426. }
  427. // ReplaceAllFunc returns a copy of src in which all matches for the
  428. // Regexp have been replaced by the return value of of function repl (whose
  429. // first argument is the matched []byte). No support is provided for
  430. // expressions (e.g. \1 or $1) in the replacement string.
  431. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
  432. lastMatchEnd := 0 // end position of the most recent match
  433. searchPos := 0 // position where we next look for a match
  434. buf := new(bytes.Buffer)
  435. for searchPos <= len(src) {
  436. a := re.doExecute(newInputBytes(src), searchPos, 2)
  437. if len(a) == 0 {
  438. break // no more matches
  439. }
  440. // Copy the unmatched characters before this match.
  441. buf.Write(src[lastMatchEnd:a[0]])
  442. // Now insert a copy of the replacement string, but not for a
  443. // match of the empty string immediately after another match.
  444. // (Otherwise, we get double replacement for patterns that
  445. // match both empty and nonempty strings.)
  446. if a[1] > lastMatchEnd || a[0] == 0 {
  447. buf.Write(repl(src[a[0]:a[1]]))
  448. }
  449. lastMatchEnd = a[1]
  450. // Advance past this match; always advance at least one character.
  451. _, width := utf8.DecodeRune(src[searchPos:])
  452. if searchPos+width > a[1] {
  453. searchPos += width
  454. } else if searchPos+1 > a[1] {
  455. // This clause is only needed at the end of the input
  456. // string. In that case, DecodeRuneInString returns width=0.
  457. searchPos++
  458. } else {
  459. searchPos = a[1]
  460. }
  461. }
  462. // Copy the unmatched characters after the last match.
  463. buf.Write(src[lastMatchEnd:])
  464. return buf.Bytes()
  465. }
  466. var specialBytes = []byte(`\.+*?()|[]{}^$`)
  467. func special(b byte) bool {
  468. return bytes.IndexByte(specialBytes, b) >= 0
  469. }
  470. // QuoteMeta returns a string that quotes all regular expression metacharacters
  471. // inside the argument text; the returned string is a regular expression matching
  472. // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
  473. func QuoteMeta(s string) string {
  474. b := make([]byte, 2*len(s))
  475. // A byte loop is correct because all metacharacters are ASCII.
  476. j := 0
  477. for i := 0; i < len(s); i++ {
  478. if special(s[i]) {
  479. b[j] = '\\'
  480. j++
  481. }
  482. b[j] = s[i]
  483. j++
  484. }
  485. return string(b[0:j])
  486. }
  487. // The number of capture values in the program may correspond
  488. // to fewer capturing expressions than are in the regexp.
  489. // For example, "(a){0}" turns into an empty program, so the
  490. // maximum capture in the program is 0 but we need to return
  491. // an expression for \1. Pad appends -1s to the slice a as needed.
  492. func (re *Regexp) pad(a []int) []int {
  493. if a == nil {
  494. // No match.
  495. return nil
  496. }
  497. n := (1 + re.numSubexp) * 2
  498. for len(a) < n {
  499. a = append(a, -1)
  500. }
  501. return a
  502. }
  503. // Find matches in slice b if b is non-nil, otherwise find matches in string s.
  504. func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
  505. var end int
  506. if b == nil {
  507. end = len(s)
  508. } else {
  509. end = len(b)
  510. }
  511. for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
  512. var in input
  513. if b == nil {
  514. in = newInputString(s)
  515. } else {
  516. in = newInputBytes(b)
  517. }
  518. matches := re.doExecute(in, pos, re.prog.NumCap)
  519. if len(matches) == 0 {
  520. break
  521. }
  522. accept := true
  523. if matches[1] == pos {
  524. // We've found an empty match.
  525. if matches[0] == prevMatchEnd {
  526. // We don't allow an empty match right
  527. // after a previous match, so ignore it.
  528. accept = false
  529. }
  530. var width int
  531. // TODO: use step()
  532. if b == nil {
  533. _, width = utf8.DecodeRuneInString(s[pos:end])
  534. } else {
  535. _, width = utf8.DecodeRune(b[pos:end])
  536. }
  537. if width > 0 {
  538. pos += width
  539. } else {
  540. pos = end + 1
  541. }
  542. } else {
  543. pos = matches[1]
  544. }
  545. prevMatchEnd = matches[1]
  546. if accept {
  547. deliver(re.pad(matches))
  548. i++
  549. }
  550. }
  551. }
  552. // Find returns a slice holding the text of the leftmost match in b of the regular expression.
  553. // A return value of nil indicates no match.
  554. func (re *Regexp) Find(b []byte) []byte {
  555. a := re.doExecute(newInputBytes(b), 0, 2)
  556. if a == nil {
  557. return nil
  558. }
  559. return b[a[0]:a[1]]
  560. }
  561. // FindIndex returns a two-element slice of integers defining the location of
  562. // the leftmost match in b of the regular expression. The match itself is at
  563. // b[loc[0]:loc[1]].
  564. // A return value of nil indicates no match.
  565. func (re *Regexp) FindIndex(b []byte) (loc []int) {
  566. a := re.doExecute(newInputBytes(b), 0, 2)
  567. if a == nil {
  568. return nil
  569. }
  570. return a[0:2]
  571. }
  572. // FindString returns a string holding the text of the leftmost match in s of the regular
  573. // expression. If there is no match, the return value is an empty string,
  574. // but it will also be empty if the regular expression successfully matches
  575. // an empty string. Use FindStringIndex or FindStringSubmatch if it is
  576. // necessary to distinguish these cases.
  577. func (re *Regexp) FindString(s string) string {
  578. a := re.doExecute(newInputString(s), 0, 2)
  579. if a == nil {
  580. return ""
  581. }
  582. return s[a[0]:a[1]]
  583. }
  584. // FindStringIndex returns a two-element slice of integers defining the
  585. // location of the leftmost match in s of the regular expression. The match
  586. // itself is at s[loc[0]:loc[1]].
  587. // A return value of nil indicates no match.
  588. func (re *Regexp) FindStringIndex(s string) []int {
  589. a := re.doExecute(newInputString(s), 0, 2)
  590. if a == nil {
  591. return nil
  592. }
  593. return a[0:2]
  594. }
  595. // FindReaderIndex returns a two-element slice of integers defining the
  596. // location of the leftmost match of the regular expression in text read from
  597. // the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
  598. // value of nil indicates no match.
  599. func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
  600. a := re.doExecute(newInputReader(r), 0, 2)
  601. if a == nil {
  602. return nil
  603. }
  604. return a[0:2]
  605. }
  606. // FindSubmatch returns a slice of slices holding the text of the leftmost
  607. // match of the regular expression in b and the matches, if any, of its
  608. // subexpressions, as defined by the 'Submatch' descriptions in the package
  609. // comment.
  610. // A return value of nil indicates no match.
  611. func (re *Regexp) FindSubmatch(b []byte) [][]byte {
  612. a := re.doExecute(newInputBytes(b), 0, re.prog.NumCap)
  613. if a == nil {
  614. return nil
  615. }
  616. ret := make([][]byte, 1+re.numSubexp)
  617. for i := range ret {
  618. if 2*i < len(a) && a[2*i] >= 0 {
  619. ret[i] = b[a[2*i]:a[2*i+1]]
  620. }
  621. }
  622. return ret
  623. }
  624. // FindSubmatchIndex returns a slice holding the index pairs identifying the
  625. // leftmost match of the regular expression in b and the matches, if any, of
  626. // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
  627. // in the package comment.
  628. // A return value of nil indicates no match.
  629. func (re *Regexp) FindSubmatchIndex(b []byte) []int {
  630. return re.pad(re.doExecute(newInputBytes(b), 0, re.prog.NumCap))
  631. }
  632. // FindStringSubmatch returns a slice of strings holding the text of the
  633. // leftmost match of the regular expression in s and the matches, if any, of
  634. // its subexpressions, as defined by the 'Submatch' description in the
  635. // package comment.
  636. // A return value of nil indicates no match.
  637. func (re *Regexp) FindStringSubmatch(s string) []string {
  638. a := re.doExecute(newInputString(s), 0, re.prog.NumCap)
  639. if a == nil {
  640. return nil
  641. }
  642. ret := make([]string, 1+re.numSubexp)
  643. for i := range ret {
  644. if 2*i < len(a) && a[2*i] >= 0 {
  645. ret[i] = s[a[2*i]:a[2*i+1]]
  646. }
  647. }
  648. return ret
  649. }
  650. // FindStringSubmatchIndex returns a slice holding the index pairs
  651. // identifying the leftmost match of the regular expression in s and the
  652. // matches, if any, of its subexpressions, as defined by the 'Submatch' and
  653. // 'Index' descriptions in the package comment.
  654. // A return value of nil indicates no match.
  655. func (re *Regexp) FindStringSubmatchIndex(s string) []int {
  656. return re.pad(re.doExecute(newInputString(s), 0, re.prog.NumCap))
  657. }
  658. // FindReaderSubmatchIndex returns a slice holding the index pairs
  659. // identifying the leftmost match of the regular expression of text read by
  660. // the RuneReader, and the matches, if any, of its subexpressions, as defined
  661. // by the 'Submatch' and 'Index' descriptions in the package comment. A
  662. // return value of nil indicates no match.
  663. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
  664. return re.pad(re.doExecute(newInputReader(r), 0, re.prog.NumCap))
  665. }
  666. const startSize = 10 // The size at which to start a slice in the 'All' routines.
  667. // FindAll is the 'All' version of Find; it returns a slice of all successive
  668. // matches of the expression, as defined by the 'All' description in the
  669. // package comment.
  670. // A return value of nil indicates no match.
  671. func (re *Regexp) FindAll(b []byte, n int) [][]byte {
  672. if n < 0 {
  673. n = len(b) + 1
  674. }
  675. result := make([][]byte, 0, startSize)
  676. re.allMatches("", b, n, func(match []int) {
  677. result = append(result, b[match[0]:match[1]])
  678. })
  679. if len(result) == 0 {
  680. return nil
  681. }
  682. return result
  683. }
  684. // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
  685. // successive matches of the expression, as defined by the 'All' description
  686. // in the package comment.
  687. // A return value of nil indicates no match.
  688. func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
  689. if n < 0 {
  690. n = len(b) + 1
  691. }
  692. result := make([][]int, 0, startSize)
  693. re.allMatches("", b, n, func(match []int) {
  694. result = append(result, match[0:2])
  695. })
  696. if len(result) == 0 {
  697. return nil
  698. }
  699. return result
  700. }
  701. // FindAllString is the 'All' version of FindString; it returns a slice of all
  702. // successive matches of the expression, as defined by the 'All' description
  703. // in the package comment.
  704. // A return value of nil indicates no match.
  705. func (re *Regexp) FindAllString(s string, n int) []string {
  706. if n < 0 {
  707. n = len(s) + 1
  708. }
  709. result := make([]string, 0, startSize)
  710. re.allMatches(s, nil, n, func(match []int) {
  711. result = append(result, s[match[0]:match[1]])
  712. })
  713. if len(result) == 0 {
  714. return nil
  715. }
  716. return result
  717. }
  718. // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
  719. // slice of all successive matches of the expression, as defined by the 'All'
  720. // description in the package comment.
  721. // A return value of nil indicates no match.
  722. func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
  723. if n < 0 {
  724. n = len(s) + 1
  725. }
  726. result := make([][]int, 0, startSize)
  727. re.allMatches(s, nil, n, func(match []int) {
  728. result = append(result, match[0:2])
  729. })
  730. if len(result) == 0 {
  731. return nil
  732. }
  733. return result
  734. }
  735. // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
  736. // of all successive matches of the expression, as defined by the 'All'
  737. // description in the package comment.
  738. // A return value of nil indicates no match.
  739. func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
  740. if n < 0 {
  741. n = len(b) + 1
  742. }
  743. result := make([][][]byte, 0, startSize)
  744. re.allMatches("", b, n, func(match []int) {
  745. slice := make([][]byte, len(match)/2)
  746. for j := range slice {
  747. if match[2*j] >= 0 {
  748. slice[j] = b[match[2*j]:match[2*j+1]]
  749. }
  750. }
  751. result = append(result, slice)
  752. })
  753. if len(result) == 0 {
  754. return nil
  755. }
  756. return result
  757. }
  758. // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
  759. // a slice of all successive matches of the expression, as defined by the
  760. // 'All' description in the package comment.
  761. // A return value of nil indicates no match.
  762. func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
  763. if n < 0 {
  764. n = len(b) + 1
  765. }
  766. result := make([][]int, 0, startSize)
  767. re.allMatches("", b, n, func(match []int) {
  768. result = append(result, match)
  769. })
  770. if len(result) == 0 {
  771. return nil
  772. }
  773. return result
  774. }
  775. // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
  776. // returns a slice of all successive matches of the expression, as defined by
  777. // the 'All' description in the package comment.
  778. // A return value of nil indicates no match.
  779. func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
  780. if n < 0 {
  781. n = len(s) + 1
  782. }
  783. result := make([][]string, 0, startSize)
  784. re.allMatches(s, nil, n, func(match []int) {
  785. slice := make([]string, len(match)/2)
  786. for j := range slice {
  787. if match[2*j] >= 0 {
  788. slice[j] = s[match[2*j]:match[2*j+1]]
  789. }
  790. }
  791. result = append(result, slice)
  792. })
  793. if len(result) == 0 {
  794. return nil
  795. }
  796. return result
  797. }
  798. // FindAllStringSubmatchIndex is the 'All' version of
  799. // FindStringSubmatchIndex; it returns a slice of all successive matches of
  800. // the expression, as defined by the 'All' description in the package
  801. // comment.
  802. // A return value of nil indicates no match.
  803. func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
  804. if n < 0 {
  805. n = len(s) + 1
  806. }
  807. result := make([][]int, 0, startSize)
  808. re.allMatches(s, nil, n, func(match []int) {
  809. result = append(result, match)
  810. })
  811. if len(result) == 0 {
  812. return nil
  813. }
  814. return result
  815. }