/Tools/scripts/pindent.py

http://unladen-swallow.googlecode.com/ · Python · 542 lines · 337 code · 41 blank · 164 comment · 86 complexity · 8cc642ad5770ffa16f7d4259aecd17dd MD5 · raw file

  1. #! /usr/bin/env python
  2. # This file contains a class and a main program that perform three
  3. # related (though complimentary) formatting operations on Python
  4. # programs. When called as "pindent -c", it takes a valid Python
  5. # program as input and outputs a version augmented with block-closing
  6. # comments. When called as "pindent -d", it assumes its input is a
  7. # Python program with block-closing comments and outputs a commentless
  8. # version. When called as "pindent -r" it assumes its input is a
  9. # Python program with block-closing comments but with its indentation
  10. # messed up, and outputs a properly indented version.
  11. # A "block-closing comment" is a comment of the form '# end <keyword>'
  12. # where <keyword> is the keyword that opened the block. If the
  13. # opening keyword is 'def' or 'class', the function or class name may
  14. # be repeated in the block-closing comment as well. Here is an
  15. # example of a program fully augmented with block-closing comments:
  16. # def foobar(a, b):
  17. # if a == b:
  18. # a = a+1
  19. # elif a < b:
  20. # b = b-1
  21. # if b > a: a = a-1
  22. # # end if
  23. # else:
  24. # print 'oops!'
  25. # # end if
  26. # # end def foobar
  27. # Note that only the last part of an if...elif...else... block needs a
  28. # block-closing comment; the same is true for other compound
  29. # statements (e.g. try...except). Also note that "short-form" blocks
  30. # like the second 'if' in the example must be closed as well;
  31. # otherwise the 'else' in the example would be ambiguous (remember
  32. # that indentation is not significant when interpreting block-closing
  33. # comments).
  34. # The operations are idempotent (i.e. applied to their own output
  35. # they yield an identical result). Running first "pindent -c" and
  36. # then "pindent -r" on a valid Python program produces a program that
  37. # is semantically identical to the input (though its indentation may
  38. # be different). Running "pindent -e" on that output produces a
  39. # program that only differs from the original in indentation.
  40. # Other options:
  41. # -s stepsize: set the indentation step size (default 8)
  42. # -t tabsize : set the number of spaces a tab character is worth (default 8)
  43. # -e : expand TABs into spaces
  44. # file ... : input file(s) (default standard input)
  45. # The results always go to standard output
  46. # Caveats:
  47. # - comments ending in a backslash will be mistaken for continued lines
  48. # - continuations using backslash are always left unchanged
  49. # - continuations inside parentheses are not extra indented by -r
  50. # but must be indented for -c to work correctly (this breaks
  51. # idempotency!)
  52. # - continued lines inside triple-quoted strings are totally garbled
  53. # Secret feature:
  54. # - On input, a block may also be closed with an "end statement" --
  55. # this is a block-closing comment without the '#' sign.
  56. # Possible improvements:
  57. # - check syntax based on transitions in 'next' table
  58. # - better error reporting
  59. # - better error recovery
  60. # - check identifier after class/def
  61. # The following wishes need a more complete tokenization of the source:
  62. # - Don't get fooled by comments ending in backslash
  63. # - reindent continuation lines indicated by backslash
  64. # - handle continuation lines inside parentheses/braces/brackets
  65. # - handle triple quoted strings spanning lines
  66. # - realign comments
  67. # - optionally do much more thorough reformatting, a la C indent
  68. # Defaults
  69. STEPSIZE = 8
  70. TABSIZE = 8
  71. EXPANDTABS = 0
  72. import re
  73. import sys
  74. next = {}
  75. next['if'] = next['elif'] = 'elif', 'else', 'end'
  76. next['while'] = next['for'] = 'else', 'end'
  77. next['try'] = 'except', 'finally'
  78. next['except'] = 'except', 'else', 'end'
  79. next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
  80. next['end'] = ()
  81. start = 'if', 'while', 'for', 'try', 'def', 'class'
  82. class PythonIndenter:
  83. def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  84. indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  85. self.fpi = fpi
  86. self.fpo = fpo
  87. self.indentsize = indentsize
  88. self.tabsize = tabsize
  89. self.lineno = 0
  90. self.expandtabs = expandtabs
  91. self._write = fpo.write
  92. self.kwprog = re.compile(
  93. r'^\s*(?P<kw>[a-z]+)'
  94. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  95. r'[^\w]')
  96. self.endprog = re.compile(
  97. r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
  98. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  99. r'[^\w]')
  100. self.wsprog = re.compile(r'^[ \t]*')
  101. # end def __init__
  102. def write(self, line):
  103. if self.expandtabs:
  104. self._write(line.expandtabs(self.tabsize))
  105. else:
  106. self._write(line)
  107. # end if
  108. # end def write
  109. def readline(self):
  110. line = self.fpi.readline()
  111. if line: self.lineno = self.lineno + 1
  112. # end if
  113. return line
  114. # end def readline
  115. def error(self, fmt, *args):
  116. if args: fmt = fmt % args
  117. # end if
  118. sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
  119. self.write('### %s ###\n' % fmt)
  120. # end def error
  121. def getline(self):
  122. line = self.readline()
  123. while line[-2:] == '\\\n':
  124. line2 = self.readline()
  125. if not line2: break
  126. # end if
  127. line = line + line2
  128. # end while
  129. return line
  130. # end def getline
  131. def putline(self, line, indent = None):
  132. if indent is None:
  133. self.write(line)
  134. return
  135. # end if
  136. tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
  137. i = 0
  138. m = self.wsprog.match(line)
  139. if m: i = m.end()
  140. # end if
  141. self.write('\t'*tabs + ' '*spaces + line[i:])
  142. # end def putline
  143. def reformat(self):
  144. stack = []
  145. while 1:
  146. line = self.getline()
  147. if not line: break # EOF
  148. # end if
  149. m = self.endprog.match(line)
  150. if m:
  151. kw = 'end'
  152. kw2 = m.group('kw')
  153. if not stack:
  154. self.error('unexpected end')
  155. elif stack[-1][0] != kw2:
  156. self.error('unmatched end')
  157. # end if
  158. del stack[-1:]
  159. self.putline(line, len(stack))
  160. continue
  161. # end if
  162. m = self.kwprog.match(line)
  163. if m:
  164. kw = m.group('kw')
  165. if kw in start:
  166. self.putline(line, len(stack))
  167. stack.append((kw, kw))
  168. continue
  169. # end if
  170. if next.has_key(kw) and stack:
  171. self.putline(line, len(stack)-1)
  172. kwa, kwb = stack[-1]
  173. stack[-1] = kwa, kw
  174. continue
  175. # end if
  176. # end if
  177. self.putline(line, len(stack))
  178. # end while
  179. if stack:
  180. self.error('unterminated keywords')
  181. for kwa, kwb in stack:
  182. self.write('\t%s\n' % kwa)
  183. # end for
  184. # end if
  185. # end def reformat
  186. def delete(self):
  187. begin_counter = 0
  188. end_counter = 0
  189. while 1:
  190. line = self.getline()
  191. if not line: break # EOF
  192. # end if
  193. m = self.endprog.match(line)
  194. if m:
  195. end_counter = end_counter + 1
  196. continue
  197. # end if
  198. m = self.kwprog.match(line)
  199. if m:
  200. kw = m.group('kw')
  201. if kw in start:
  202. begin_counter = begin_counter + 1
  203. # end if
  204. # end if
  205. self.putline(line)
  206. # end while
  207. if begin_counter - end_counter < 0:
  208. sys.stderr.write('Warning: input contained more end tags than expected\n')
  209. elif begin_counter - end_counter > 0:
  210. sys.stderr.write('Warning: input contained less end tags than expected\n')
  211. # end if
  212. # end def delete
  213. def complete(self):
  214. self.indentsize = 1
  215. stack = []
  216. todo = []
  217. thisid = ''
  218. current, firstkw, lastkw, topid = 0, '', '', ''
  219. while 1:
  220. line = self.getline()
  221. i = 0
  222. m = self.wsprog.match(line)
  223. if m: i = m.end()
  224. # end if
  225. m = self.endprog.match(line)
  226. if m:
  227. thiskw = 'end'
  228. endkw = m.group('kw')
  229. thisid = m.group('id')
  230. else:
  231. m = self.kwprog.match(line)
  232. if m:
  233. thiskw = m.group('kw')
  234. if not next.has_key(thiskw):
  235. thiskw = ''
  236. # end if
  237. if thiskw in ('def', 'class'):
  238. thisid = m.group('id')
  239. else:
  240. thisid = ''
  241. # end if
  242. elif line[i:i+1] in ('\n', '#'):
  243. todo.append(line)
  244. continue
  245. else:
  246. thiskw = ''
  247. # end if
  248. # end if
  249. indent = len(line[:i].expandtabs(self.tabsize))
  250. while indent < current:
  251. if firstkw:
  252. if topid:
  253. s = '# end %s %s\n' % (
  254. firstkw, topid)
  255. else:
  256. s = '# end %s\n' % firstkw
  257. # end if
  258. self.putline(s, current)
  259. firstkw = lastkw = ''
  260. # end if
  261. current, firstkw, lastkw, topid = stack[-1]
  262. del stack[-1]
  263. # end while
  264. if indent == current and firstkw:
  265. if thiskw == 'end':
  266. if endkw != firstkw:
  267. self.error('mismatched end')
  268. # end if
  269. firstkw = lastkw = ''
  270. elif not thiskw or thiskw in start:
  271. if topid:
  272. s = '# end %s %s\n' % (
  273. firstkw, topid)
  274. else:
  275. s = '# end %s\n' % firstkw
  276. # end if
  277. self.putline(s, current)
  278. firstkw = lastkw = topid = ''
  279. # end if
  280. # end if
  281. if indent > current:
  282. stack.append((current, firstkw, lastkw, topid))
  283. if thiskw and thiskw not in start:
  284. # error
  285. thiskw = ''
  286. # end if
  287. current, firstkw, lastkw, topid = \
  288. indent, thiskw, thiskw, thisid
  289. # end if
  290. if thiskw:
  291. if thiskw in start:
  292. firstkw = lastkw = thiskw
  293. topid = thisid
  294. else:
  295. lastkw = thiskw
  296. # end if
  297. # end if
  298. for l in todo: self.write(l)
  299. # end for
  300. todo = []
  301. if not line: break
  302. # end if
  303. self.write(line)
  304. # end while
  305. # end def complete
  306. # end class PythonIndenter
  307. # Simplified user interface
  308. # - xxx_filter(input, output): read and write file objects
  309. # - xxx_string(s): take and return string object
  310. # - xxx_file(filename): process file in place, return true iff changed
  311. def complete_filter(input = sys.stdin, output = sys.stdout,
  312. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  313. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  314. pi.complete()
  315. # end def complete_filter
  316. def delete_filter(input= sys.stdin, output = sys.stdout,
  317. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  318. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  319. pi.delete()
  320. # end def delete_filter
  321. def reformat_filter(input = sys.stdin, output = sys.stdout,
  322. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  323. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  324. pi.reformat()
  325. # end def reformat_filter
  326. class StringReader:
  327. def __init__(self, buf):
  328. self.buf = buf
  329. self.pos = 0
  330. self.len = len(self.buf)
  331. # end def __init__
  332. def read(self, n = 0):
  333. if n <= 0:
  334. n = self.len - self.pos
  335. else:
  336. n = min(n, self.len - self.pos)
  337. # end if
  338. r = self.buf[self.pos : self.pos + n]
  339. self.pos = self.pos + n
  340. return r
  341. # end def read
  342. def readline(self):
  343. i = self.buf.find('\n', self.pos)
  344. return self.read(i + 1 - self.pos)
  345. # end def readline
  346. def readlines(self):
  347. lines = []
  348. line = self.readline()
  349. while line:
  350. lines.append(line)
  351. line = self.readline()
  352. # end while
  353. return lines
  354. # end def readlines
  355. # seek/tell etc. are left as an exercise for the reader
  356. # end class StringReader
  357. class StringWriter:
  358. def __init__(self):
  359. self.buf = ''
  360. # end def __init__
  361. def write(self, s):
  362. self.buf = self.buf + s
  363. # end def write
  364. def getvalue(self):
  365. return self.buf
  366. # end def getvalue
  367. # end class StringWriter
  368. def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  369. input = StringReader(source)
  370. output = StringWriter()
  371. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  372. pi.complete()
  373. return output.getvalue()
  374. # end def complete_string
  375. def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  376. input = StringReader(source)
  377. output = StringWriter()
  378. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  379. pi.delete()
  380. return output.getvalue()
  381. # end def delete_string
  382. def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  383. input = StringReader(source)
  384. output = StringWriter()
  385. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  386. pi.reformat()
  387. return output.getvalue()
  388. # end def reformat_string
  389. def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  390. source = open(filename, 'r').read()
  391. result = complete_string(source, stepsize, tabsize, expandtabs)
  392. if source == result: return 0
  393. # end if
  394. import os
  395. try: os.rename(filename, filename + '~')
  396. except os.error: pass
  397. # end try
  398. f = open(filename, 'w')
  399. f.write(result)
  400. f.close()
  401. return 1
  402. # end def complete_file
  403. def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  404. source = open(filename, 'r').read()
  405. result = delete_string(source, stepsize, tabsize, expandtabs)
  406. if source == result: return 0
  407. # end if
  408. import os
  409. try: os.rename(filename, filename + '~')
  410. except os.error: pass
  411. # end try
  412. f = open(filename, 'w')
  413. f.write(result)
  414. f.close()
  415. return 1
  416. # end def delete_file
  417. def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  418. source = open(filename, 'r').read()
  419. result = reformat_string(source, stepsize, tabsize, expandtabs)
  420. if source == result: return 0
  421. # end if
  422. import os
  423. try: os.rename(filename, filename + '~')
  424. except os.error: pass
  425. # end try
  426. f = open(filename, 'w')
  427. f.write(result)
  428. f.close()
  429. return 1
  430. # end def reformat_file
  431. # Test program when called as a script
  432. usage = """
  433. usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
  434. -c : complete a correctly indented program (add #end directives)
  435. -d : delete #end directives
  436. -r : reformat a completed program (use #end directives)
  437. -s stepsize: indentation step (default %(STEPSIZE)d)
  438. -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
  439. -e : expand TABs into spaces (defailt OFF)
  440. [file] ... : files are changed in place, with backups in file~
  441. If no files are specified or a single - is given,
  442. the program acts as a filter (reads stdin, writes stdout).
  443. """ % vars()
  444. def error_both(op1, op2):
  445. sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
  446. sys.stderr.write(usage)
  447. sys.exit(2)
  448. # end def error_both
  449. def test():
  450. import getopt
  451. try:
  452. opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
  453. except getopt.error, msg:
  454. sys.stderr.write('Error: %s\n' % msg)
  455. sys.stderr.write(usage)
  456. sys.exit(2)
  457. # end try
  458. action = None
  459. stepsize = STEPSIZE
  460. tabsize = TABSIZE
  461. expandtabs = EXPANDTABS
  462. for o, a in opts:
  463. if o == '-c':
  464. if action: error_both(o, action)
  465. # end if
  466. action = 'complete'
  467. elif o == '-d':
  468. if action: error_both(o, action)
  469. # end if
  470. action = 'delete'
  471. elif o == '-r':
  472. if action: error_both(o, action)
  473. # end if
  474. action = 'reformat'
  475. elif o == '-s':
  476. stepsize = int(a)
  477. elif o == '-t':
  478. tabsize = int(a)
  479. elif o == '-e':
  480. expandtabs = 1
  481. # end if
  482. # end for
  483. if not action:
  484. sys.stderr.write(
  485. 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
  486. sys.stderr.write(usage)
  487. sys.exit(2)
  488. # end if
  489. if not args or args == ['-']:
  490. action = eval(action + '_filter')
  491. action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
  492. else:
  493. action = eval(action + '_file')
  494. for filename in args:
  495. action(filename, stepsize, tabsize, expandtabs)
  496. # end for
  497. # end if
  498. # end def test
  499. if __name__ == '__main__':
  500. test()
  501. # end if