/sre2/simple.go

https://code.google.com/p/sre2/ · Go · 142 lines · 104 code · 19 blank · 19 comment · 20 complexity · 30ec3bae1ac616103252f8b71a0d145c MD5 · raw file

  1. package sre2
  2. func (r *sregexp) Match(src string) bool {
  3. success, _ := r.run(src, false)
  4. return success
  5. }
  6. func (r *sregexp) MatchIndex(src string) []int {
  7. _, capture := r.run(src, true)
  8. return capture
  9. }
  10. func (r *sregexp) run(src string, submatch bool) (success bool, capture []int) {
  11. curr := makeStateList(len(r.prog))
  12. next := makeStateList(len(r.prog))
  13. parser := NewSafeReader(src)
  14. return r._run(curr, next, &parser, src, submatch)
  15. }
  16. func (r *sregexp) _run(curr *stateList, next *stateList, parser *SafeReader, src string, submatch bool) (success bool, capture []int) {
  17. // always start with state zero
  18. curr.addstate(parser, r.prog[r.start], submatch, nil)
  19. for parser.nextCh() != -1 {
  20. ch := parser.curr()
  21. if len(curr.states) == 0 {
  22. return false, nil // no more possible states, short-circuit failure
  23. }
  24. // move along rune paths
  25. for _, st := range curr.states {
  26. i := r.prog[st.idx]
  27. if i.match(ch) {
  28. next.addstate(parser, i.out, submatch, st.capture)
  29. }
  30. }
  31. curr, next = next, curr
  32. next.clear() // clear next so it can be re-used
  33. }
  34. // search for success state
  35. for _, st := range curr.states {
  36. if r.prog[st.idx].mode == iMatch {
  37. return true, st.capture.list(r.caps)
  38. }
  39. }
  40. return false, nil
  41. }
  42. // stateList is used by regexp.run() to efficiently maintain an ordered list of
  43. // current/next regexp integer states.
  44. type stateList struct {
  45. sparse []int
  46. states []state
  47. }
  48. // state represents a state index and captureInfo pair.
  49. type state struct {
  50. idx int
  51. capture *captureInfo
  52. }
  53. // makeStateList builds a new ordered bitset for use in the regexp.
  54. func makeStateList(states int) *stateList {
  55. return &stateList{make([]int, states), make([]state, 0, states)}
  56. }
  57. // addstate descends through split/alt states and places them all in the
  58. // given stateList.
  59. func (o *stateList) addstate(p *SafeReader, st *instr, submatch bool, capture *captureInfo) {
  60. switch st.mode {
  61. case iSplit:
  62. o.addstate(p, st.out, submatch, capture)
  63. o.addstate(p, st.out1, submatch, capture)
  64. case iIndexCap:
  65. if submatch {
  66. capture = capture.push(p.npos(), st.cid)
  67. }
  68. o.addstate(p, st.out, submatch, capture)
  69. case iBoundaryCase:
  70. if st.matchBoundaryMode(p.curr(), p.peek()) {
  71. o.addstate(p, st.out, submatch, capture)
  72. }
  73. case iRuneClass, iMatch:
  74. o.put(st.idx, capture)
  75. default:
  76. panic("unexpected instr")
  77. }
  78. }
  79. // put places the given state into the stateList. Returns true if the state was
  80. // previously set, and false if it was not.
  81. func (o *stateList) put(v int, capture *captureInfo) bool {
  82. pos := len(o.states)
  83. if o.sparse[v] < pos && o.states[o.sparse[v]].idx == v {
  84. return true // already exists
  85. }
  86. o.states = o.states[:pos+1]
  87. o.sparse[v] = pos
  88. o.states[pos].idx = v
  89. o.states[pos].capture = capture
  90. return false
  91. }
  92. // clear resets the stateList to be re-used.
  93. func (o *stateList) clear() {
  94. o.states = o.states[0:0]
  95. }
  96. // captureInfo represents the submatch information for a given run. This is represented as a linked
  97. // list so that early states can be shared; however there's more cost in GC.
  98. type captureInfo struct {
  99. c int // capture index
  100. pos int // position in string
  101. prev *captureInfo // previous node in list, or nil
  102. }
  103. // push adds a new head to the existing submatch information, returning it. Note that the receiver
  104. // here may be nil.
  105. func (info *captureInfo) push(pos int, c int) *captureInfo {
  106. // TODO: If we traverse back and remove previous instances of this capture group, then we might
  107. // remove information used by other branches.
  108. return &captureInfo{c, pos, info}
  109. }
  110. // list translates the given submatch state into a concrete []int for use by callers.
  111. func (info *captureInfo) list(size int) (ret []int) {
  112. ret = make([]int, size<<1)
  113. for i := 0; i < len(ret); i++ {
  114. ret[i] = -1
  115. }
  116. for info != nil {
  117. if ret[info.c] == -1 {
  118. ret[info.c] = info.pos
  119. }
  120. info = info.prev
  121. }
  122. return ret
  123. }