PageRenderTime 14ms CodeModel.GetById 1ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/sre2/simple.go

https://code.google.com/p/sre2/
Go | 142 lines | 104 code | 19 blank | 19 comment | 20 complexity | 30ec3bae1ac616103252f8b71a0d145c MD5 | raw file
  1package sre2
  2
  3func (r *sregexp) Match(src string) bool {
  4	success, _ := r.run(src, false)
  5	return success
  6}
  7
  8func (r *sregexp) MatchIndex(src string) []int {
  9	_, capture := r.run(src, true)
 10	return capture
 11}
 12
 13func (r *sregexp) run(src string, submatch bool) (success bool, capture []int) {
 14	curr := makeStateList(len(r.prog))
 15	next := makeStateList(len(r.prog))
 16	parser := NewSafeReader(src)
 17
 18	return r._run(curr, next, &parser, src, submatch)
 19}
 20
 21
 22func (r *sregexp) _run(curr *stateList, next *stateList, parser *SafeReader, src string, submatch bool) (success bool, capture []int) {
 23	// always start with state zero
 24	curr.addstate(parser, r.prog[r.start], submatch, nil)
 25
 26	for parser.nextCh() != -1 {
 27		ch := parser.curr()
 28		if len(curr.states) == 0 {
 29			return false, nil // no more possible states, short-circuit failure
 30		}
 31
 32		// move along rune paths
 33		for _, st := range curr.states {
 34			i := r.prog[st.idx]
 35			if i.match(ch) {
 36				next.addstate(parser, i.out, submatch, st.capture)
 37			}
 38		}
 39		curr, next = next, curr
 40		next.clear() // clear next so it can be re-used
 41	}
 42
 43	// search for success state
 44	for _, st := range curr.states {
 45		if r.prog[st.idx].mode == iMatch {
 46			return true, st.capture.list(r.caps)
 47		}
 48	}
 49	return false, nil
 50}
 51
 52// stateList is used by regexp.run() to efficiently maintain an ordered list of
 53// current/next regexp integer states.
 54type stateList struct {
 55	sparse []int
 56	states []state
 57}
 58
 59// state represents a state index and captureInfo pair.
 60type state struct {
 61	idx     int
 62	capture *captureInfo
 63}
 64
 65// makeStateList builds a new ordered bitset for use in the regexp.
 66func makeStateList(states int) *stateList {
 67	return &stateList{make([]int, states), make([]state, 0, states)}
 68}
 69
 70// addstate descends through split/alt states and places them all in the
 71// given stateList.
 72func (o *stateList) addstate(p *SafeReader, st *instr, submatch bool, capture *captureInfo) {
 73	switch st.mode {
 74	case iSplit:
 75		o.addstate(p, st.out, submatch, capture)
 76		o.addstate(p, st.out1, submatch, capture)
 77	case iIndexCap:
 78		if submatch {
 79			capture = capture.push(p.npos(), st.cid)
 80		}
 81		o.addstate(p, st.out, submatch, capture)
 82	case iBoundaryCase:
 83		if st.matchBoundaryMode(p.curr(), p.peek()) {
 84			o.addstate(p, st.out, submatch, capture)
 85		}
 86	case iRuneClass, iMatch:
 87		o.put(st.idx, capture)
 88	default:
 89		panic("unexpected instr")
 90	}
 91}
 92
 93// put places the given state into the stateList. Returns true if the state was
 94// previously set, and false if it was not.
 95func (o *stateList) put(v int, capture *captureInfo) bool {
 96	pos := len(o.states)
 97	if o.sparse[v] < pos && o.states[o.sparse[v]].idx == v {
 98		return true // already exists
 99	}
100
101	o.states = o.states[:pos+1]
102	o.sparse[v] = pos
103	o.states[pos].idx = v
104	o.states[pos].capture = capture
105	return false
106}
107
108// clear resets the stateList to be re-used.
109func (o *stateList) clear() {
110	o.states = o.states[0:0]
111}
112
113// captureInfo represents the submatch information for a given run. This is represented as a linked
114// list so that early states can be shared; however there's more cost in GC.
115type captureInfo struct {
116	c    int          // capture index
117	pos  int          // position in string
118	prev *captureInfo // previous node in list, or nil
119}
120
121// push adds a new head to the existing submatch information, returning it. Note that the receiver
122// here may be nil.
123func (info *captureInfo) push(pos int, c int) *captureInfo {
124	// TODO: If we traverse back and remove previous instances of this capture group, then we might
125	// remove information used by other branches.
126	return &captureInfo{c, pos, info}
127}
128
129// list translates the given submatch state into a concrete []int for use by callers.
130func (info *captureInfo) list(size int) (ret []int) {
131	ret = make([]int, size<<1)
132	for i := 0; i < len(ret); i++ {
133		ret[i] = -1
134	}
135	for info != nil {
136		if ret[info.c] == -1 {
137			ret[info.c] = info.pos
138		}
139		info = info.prev
140	}
141	return ret
142}