PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/pyjade/lexer.py

https://github.com/weapp/pyjade
Python | 419 lines | 416 code | 2 blank | 1 comment | 1 complexity | f1226b5c0cf63045514e33652b99ccaa MD5 | raw file
Possible License(s): MIT
  1. import re
  2. from collections import deque
  3. class Token:
  4. def __init__(self, **kwds):
  5. self.buffer = None
  6. self.__dict__.update(kwds)
  7. def __str__(self):
  8. return self.__dict__.__str__()
  9. def regexec (regex, input):
  10. matches = regex.match(input)
  11. if matches:
  12. return (input[matches.start():matches.end()],)+matches.groups()
  13. return None
  14. class Lexer(object):
  15. RE_INPUT = re.compile(r'\r\n|\r')
  16. RE_COMMENT = re.compile(r'^ *\/\/(-)?([^\n]*)')
  17. RE_TAG = re.compile(r'^(\w[-:\w]*)')
  18. RE_FILTER = re.compile(r'^:(\w+)')
  19. RE_DOCTYPE = re.compile(r'^(?:!!!|doctype) *([^\n]+)?')
  20. RE_ID = re.compile(r'^#([\w-]+)')
  21. RE_CLASS = re.compile(r'^\.([\w-]+)')
  22. RE_TEXT = re.compile(r'^(?:\| ?)?([^\n]+)')
  23. RE_EXTENDS = re.compile(r'^extends? +([^\n]+)')
  24. RE_PREPEND = re.compile(r'^prepend +([^\n]+)')
  25. RE_APPEND = re.compile(r'^append +([^\n]+)')
  26. RE_BLOCK = re.compile(r'^block +(?:(prepend|append) +)?([^\n]+)')
  27. RE_YIELD = re.compile(r'^yield *')
  28. RE_INCLUDE = re.compile(r'^include +([^\n]+)')
  29. RE_ASSIGNMENT = re.compile(r'^(\w+) += *([^;\n]+)( *;? *)')
  30. RE_MIXIN = re.compile(r'^mixin +([-\w]+)(?: *\((.*)\))?')
  31. RE_CONDITIONAL = re.compile(r'^(?:- *)?(if|unless|else if|elif|else)\b([^\n]*)')
  32. # RE_WHILE = re.compile(r'^while +([^\n]+)')
  33. RE_EACH = re.compile(r'^(?:- *)?(?:each|for) +([\w, ]+) +in +([^\n]+)')
  34. RE_CODE = re.compile(r'^(!?=|-)([^\n]+)')
  35. RE_ATTR_INTERPOLATE = re.compile(r'#\{([^}]+)\}')
  36. RE_ATTR_PARSE = re.compile(r'''^['"]|['"]$''')
  37. RE_INDENT_TABS = re.compile(r'^\n(\t*) *')
  38. RE_INDENT_SPACES = re.compile(r'^\n( *)')
  39. RE_COLON = re.compile(r'^: *')
  40. # RE_ = re.compile(r'')
  41. def __init__(self,str,**options):
  42. self.options = options
  43. self.input = self.RE_INPUT.sub('\n',str)
  44. self.colons = self.options.get('colons',False)
  45. self.deferredTokens = deque()
  46. self.lastIndents = 0
  47. self.lineno = 1
  48. self.stash = deque()
  49. self.indentStack = deque()
  50. self.indentRe = None
  51. self.pipeless = False
  52. def tok(self,type,val=None):
  53. return Token(type=type,line=self.lineno,val=val)
  54. def consume(self,len):
  55. self.input = self.input[len:]
  56. def scan(self,regexp,type):
  57. captures = regexec(regexp,self.input)
  58. # print regexp,type, self.input, captures
  59. if captures:
  60. # print captures
  61. self.consume(len(captures[0]))
  62. # print 'a',self.input
  63. if len(captures)==1: return self.tok(type,None)
  64. return self.tok(type,captures[1])
  65. def defer(self,tok):
  66. self.deferredTokens.append(tok)
  67. def lookahead(self,n):
  68. # print self.stash
  69. fetch = n-len(self.stash)
  70. while True:
  71. fetch -=1
  72. if not fetch>=0: break
  73. self.stash.append(self.next())
  74. return self.stash[n-1]
  75. def indexOfDelimiters(self,start,end):
  76. str,nstart,nend,pos = self.input,0,0,0
  77. for i,s in enumerate(str):
  78. if start == s: nstart +=1
  79. elif end == s:
  80. nend +=1
  81. if nend==nstart:
  82. pos = i
  83. break
  84. return pos
  85. def stashed (self):
  86. # print self.stash
  87. return len(self.stash) and self.stash.popleft()
  88. def deferred (self):
  89. return len(self.deferredTokens) and self.deferredTokens.popleft()
  90. def eos (self):
  91. # print 'eos',bool(self.input)
  92. if self.input: return
  93. if self.indentStack:
  94. self.indentStack.popleft()
  95. return self.tok('outdent')
  96. else:
  97. return self.tok('eos')
  98. def comment(self):
  99. captures = regexec(self.RE_COMMENT,self.input)
  100. if captures:
  101. self.consume(len(captures[0]))
  102. tok = self.tok('comment',captures[2])
  103. tok.buffer = '-'!=captures[1]
  104. return tok
  105. def tag(self):
  106. captures = regexec(self.RE_TAG,self.input)
  107. # print self.input,captures,re.match('^(\w[-:\w]*)',self.input)
  108. if captures:
  109. self.consume(len(captures[0]))
  110. name = captures[1]
  111. if name.endswith(':'):
  112. name = name[:-1]
  113. tok = self.tok('tag',name)
  114. self.defer(self.tok(':'))
  115. while self.input[0]== ' ': self.input = self.input[1:]
  116. else:
  117. tok = self.tok('tag',name)
  118. return tok
  119. def filter(self):
  120. return self.scan(self.RE_FILTER, 'filter')
  121. def doctype(self):
  122. # print self.scan(self.RE_DOCTYPE, 'doctype')
  123. return self.scan(self.RE_DOCTYPE, 'doctype')
  124. def id(self):
  125. return self.scan(self.RE_ID, 'id')
  126. def className(self):
  127. return self.scan(self.RE_CLASS, 'class')
  128. def text(self):
  129. return self.scan(self.RE_TEXT, 'text')
  130. def extends(self):
  131. return self.scan(self.RE_EXTENDS, 'extends')
  132. def prepend(self):
  133. captures = regexec(self.RE_PREPEND,self.input)
  134. if captures:
  135. self.consume(len(captures[0]))
  136. mode,name = 'prepend',captures[1]
  137. tok = self.tok('block',name)
  138. tok.mode = mode
  139. return tok
  140. def append(self):
  141. captures = regexec(self.RE_APPEND,self.input)
  142. if captures:
  143. self.consume(len(captures[0]))
  144. mode,name = 'append',captures[1]
  145. tok = self.tok('block',name)
  146. tok.mode = mode
  147. return tok
  148. def block(self):
  149. captures = regexec(self.RE_BLOCK,self.input)
  150. if captures:
  151. self.consume(len(captures[0]))
  152. mode = captures[1] or 'replace'
  153. name = captures[2]
  154. tok = self.tok('block',name)
  155. tok.mode = mode
  156. return tok
  157. def _yield(self):
  158. return self.scan(self.RE_YIELD, 'yield')
  159. def include(self):
  160. return self.scan(self.RE_INCLUDE, 'include')
  161. def assignment(self):
  162. captures = regexec(self.RE_ASSIGNMENT,self.input)
  163. if captures:
  164. self.consume(len(captures[0]))
  165. name,val = captures[1:3]
  166. tok = self.tok('assignment')
  167. tok.name = name
  168. tok.val = val
  169. return tok
  170. def mixin(self):
  171. captures = regexec(self.RE_MIXIN,self.input)
  172. if captures:
  173. self.consume(len(captures[0]))
  174. tok = self.tok('mixin',captures[1])
  175. tok.args = captures[2]
  176. return tok
  177. def conditional(self):
  178. captures = regexec(self.RE_CONDITIONAL,self.input)
  179. if captures:
  180. self.consume(len(captures[0]))
  181. type,sentence = captures[1:]
  182. tok = self.tok('conditional',type)
  183. tok.sentence = sentence
  184. return tok
  185. # def _while(self):
  186. # captures = regexec(self.RE_WHILE,self.input)
  187. # if captures:
  188. # self.consume(len(captures[0]))
  189. # return self.tok('code','while(%s)'%captures[1])
  190. def each(self):
  191. captures = regexec(self.RE_EACH,self.input)
  192. if captures:
  193. self.consume(len(captures[0]))
  194. tok = self.tok('each',None)
  195. tok.keys = map(lambda x:x.strip(),captures[1].split(','))
  196. tok.code = captures[2]
  197. return tok
  198. def code(self):
  199. captures = regexec(self.RE_CODE,self.input)
  200. if captures:
  201. self.consume(len(captures[0]))
  202. flags, name = captures[1:]
  203. tok = self.tok('code',name)
  204. tok.escape = flags.startswith('=')
  205. #print captures
  206. tok.buffer = '=' in flags
  207. # print tok.buffer
  208. return tok
  209. def attrs(self):
  210. if '(' == self.input[0]:
  211. index = self.indexOfDelimiters('(',')')
  212. str = self.input[1:index]
  213. tok = self.tok('attrs')
  214. l = len(str)
  215. colons = self.colons
  216. states = ['key']
  217. class Namespace:
  218. key = u''
  219. val = u''
  220. quote = u''
  221. literal = False
  222. def reset(self):
  223. self.key = self.val = self.quote = u''
  224. self.literal = False
  225. def __str__(self):
  226. return dict(key=self.key,val=self.val,quote=self.quote,literal=self.literal).__str__()
  227. ns = Namespace()
  228. def state():
  229. return states[-1]
  230. def interpolate(attr):
  231. return self.RE_ATTR_INTERPOLATE.sub(lambda matchobj:'%s+%s.__str__()+%s'%(ns.quote,matchobj.group(1),ns.quote),attr)
  232. self.consume(index+1)
  233. from utils import odict
  234. tok.attrs = odict()
  235. tok.static_attrs = set()
  236. def parse(c):
  237. real = c
  238. if colons and ':'==c: c = '='
  239. if c in (',','\n'):
  240. s = state()
  241. if s in ('expr','array','string','object'):
  242. ns.val += c
  243. else:
  244. states.append('key')
  245. ns.val = ns.val.strip()
  246. ns.key = ns.key.strip()
  247. if not ns.key: return
  248. ns.literal = ns.literal
  249. if not ns.literal:
  250. if '!'==ns.key[-1]:
  251. ns.literal = True
  252. ns.key = ns.key[:-1]
  253. ns.key = ns.key.strip("'\"")
  254. if ns.literal:
  255. tok.static_attrs.add(ns.key)
  256. tok.attrs[ns.key] = True if not ns.val else interpolate(ns.val)
  257. ns.reset()
  258. elif '=' == c:
  259. s = state()
  260. if s == 'key char':
  261. ns.key += real
  262. elif s in ('val','expr','array','string','object'): ns.val+= real
  263. else: states.append('val')
  264. elif '(' == c:
  265. if state() in ('val','expr'): states.append('expr')
  266. ns.val+=c
  267. elif ')' == c:
  268. if state() in ('val','expr'): states.pop()
  269. ns.val+=c
  270. elif '{' == c:
  271. if 'val'==state(): states.append('object')
  272. ns.val+=c
  273. elif '}' == c:
  274. if 'object'==state(): states.pop()
  275. ns.val+=c
  276. elif '[' == c:
  277. if 'val'==state(): states.append('array')
  278. ns.val+=c
  279. elif ']' == c:
  280. if 'array'==state(): states.pop()
  281. ns.val+=c
  282. elif c in ('"',"'"):
  283. s = state()
  284. if 'key'==s: states.append('key char')
  285. elif 'key char'==s: states.pop()
  286. elif 'string'==s:
  287. if c==ns.quote: states.pop()
  288. ns.val +=c
  289. else:
  290. states.append('string')
  291. ns.val +=c
  292. ns.quote = c
  293. elif ''== c: pass
  294. else:
  295. s = state()
  296. if s in ('key','key char'): ns.key += c
  297. else: ns.val += c
  298. for char in str:
  299. parse(char)
  300. parse(',')
  301. return tok
  302. def indent(self):
  303. if self.indentRe:
  304. captures = regexec(self.indentRe,self.input)
  305. else:
  306. regex = self.RE_INDENT_TABS
  307. captures = regexec(regex,self.input)
  308. if captures and not captures[1]:
  309. regex = self.RE_INDENT_SPACES
  310. captures = regexec(regex,self.input)
  311. if captures and captures[1]: self.indentRe = regex
  312. if captures:
  313. indents = len(captures[1])
  314. self.lineno += 1
  315. self.consume(indents+1)
  316. if not self.input: return self.tok('newline')
  317. if self.input[0] in (' ','\t'):
  318. raise Exception('Invalid indentation, you can use tabs or spaces but not both')
  319. if '\n' == self.input[0]: return self.tok('newline')
  320. if self.indentStack and indents< self.indentStack[0]:
  321. while self.indentStack and self.indentStack[0]>indents:
  322. self.stash.append(self.tok('outdent'))
  323. self.indentStack.popleft()
  324. tok = self.stash.pop()
  325. elif indents and (not self.indentStack or indents != self.indentStack[0]):
  326. self.indentStack.appendleft(indents)
  327. tok = self.tok('indent',indents)
  328. else:
  329. tok = self.tok('newline')
  330. return tok
  331. def pipelessText(self):
  332. if self.pipeless:
  333. if '\n' == self.input[0]: return
  334. i = self.input.find('\n')
  335. if -1 == i: i = len(self.input)
  336. str = self.input[:i]
  337. self.consume(len(str))
  338. return self.tok('text',str)
  339. def colon(self):
  340. return self.scan(self.RE_COLON,':')
  341. def advance(self):
  342. return self.stashed() or self.next()
  343. def next(self):
  344. return self.deferred() \
  345. or self.eos() \
  346. or self.pipelessText() \
  347. or self._yield() \
  348. or self.doctype() \
  349. or self.extends() \
  350. or self.append() \
  351. or self.prepend() \
  352. or self.block() \
  353. or self.include() \
  354. or self.mixin() \
  355. or self.conditional() \
  356. or self.each() \
  357. or self.assignment() \
  358. or self.tag() \
  359. or self.filter() \
  360. or self.code() \
  361. or self.id() \
  362. or self.className() \
  363. or self.attrs() \
  364. or self.indent() \
  365. or self.comment() \
  366. or self.colon() \
  367. or self.text()
  368. ##or self._while() \