PageRenderTime 55ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/Python/system/decompyle.py

https://bitbucket.org/cwalther/moulscript-dlanor
Python | 1500 lines | 1360 code | 43 blank | 97 comment | 45 complexity | a84b2fcc40d9a2f3a2a94074c3cbdebb MD5 | raw file
Possible License(s): AGPL-1.0, GPL-3.0
  1. # Copyright (c) 1999 John Aycock
  2. # Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de>
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining
  5. # a copy of this software and associated documentation files (the
  6. # "Software"), to deal in the Software without restriction, including
  7. # without limitation the rights to use, copy, modify, merge, publish,
  8. # distribute, sublicense, and/or sell copies of the Software, and to
  9. # permit persons to whom the Software is furnished to do so, subject to
  10. # the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be
  13. # included in all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  18. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  19. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  20. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  21. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. #
  23. # See 'CHANGES' for a list of changes
  24. #
  25. # NB. This is not a masterpiece of software, but became more like a hack.
  26. # Probably a complete write would be sensefull. hG/2000-12-27
  27. #
  28. import re, sys, os, types
  29. import dis, imp, marshal
  30. import string
  31. import cStringIO
  32. def _load_file(filename):
  33. """
  34. load a Python source file and compile it to byte-code
  35. _load_module(filename: string): code_object
  36. filename: name of file containing Python source code
  37. (normally a .py)
  38. code_object: code_object compiled from this source code
  39. This function does NOT write any file!
  40. """
  41. fp = open(filename, 'rb')
  42. source = fp.read()+'\n'
  43. try:
  44. co = compile(source, filename, 'exec')
  45. except SyntaxError:
  46. sys.stderr.writelines( ['>>Syntax error in ', filename, '\n'] )
  47. raise
  48. fp.close()
  49. return co
  50. def _load_module(filename):
  51. """
  52. load a module without importing it
  53. _load_module(filename: string): code_object
  54. filename: name of file containing Python byte-code object
  55. (normally a .pyc)
  56. code_object: code_object from this file
  57. """
  58. fp = open(filename, 'rb')
  59. if fp.read(4) != imp.get_magic():
  60. raise ImportError, "Bad magic number in %s" % filename
  61. fp.read(4)
  62. co = marshal.load(fp)
  63. fp.close()
  64. return co
  65. #-- start of (de-)compiler
  66. #
  67. # Scanning
  68. #
  69. class Code:
  70. """Class for representing code-objects.
  71. This is similar to the original code object, but additionally
  72. the diassembled code is stored in the attribute '_tokens'.
  73. """
  74. def __init__(self, co):
  75. for i in dir(co):
  76. exec 'self.%s = co.%s' % (i, i)
  77. self._tokens, self._customize = disassemble(co)
  78. class Token:
  79. """Class representing a byte-code token.
  80. A byte-code token is equivalent to the contents of one line
  81. as output by dis.dis().
  82. """
  83. def __init__(self, type, attr=None, pattr=None, offset=-1):
  84. self.type = intern(type)
  85. self.attr = attr
  86. self.pattr = pattr
  87. self.offset = offset
  88. def __cmp__(self, o):
  89. if isinstance(o, Token):
  90. # both are tokens: compare type and pattr
  91. return cmp(self.type, o.type) \
  92. or cmp(self.pattr, o.pattr)
  93. else:
  94. return cmp(self.type, o)
  95. def __repr__(self): return str(self.type)
  96. def __str__(self):
  97. if self.pattr: pattr = self.pattr
  98. else: pattr = ''
  99. return '%s\t%-17s %s' % (self.offset, self.type, pattr)
  100. def __hash__(self): return hash(self.type)
  101. def __getitem__(self, i): raise IndexError
  102. _JUMP_OPS_ = map(lambda op: dis.opname[op], dis.hasjrel + dis.hasjabs)
  103. def disassemble(co):
  104. """Disassemble a code object, returning a list of Token.
  105. The main part of this procedure is modelled after
  106. dis.diaassemble().
  107. """
  108. rv = []
  109. customize = {}
  110. code = co.co_code
  111. cf = find_jump_targets(code)
  112. n = len(code)
  113. i = 0
  114. while i < n:
  115. offset = i
  116. if cf.has_key(offset):
  117. for j in range(cf[offset]):
  118. rv.append(Token('COME_FROM',
  119. offset="%s_%d" % (offset, j) ))
  120. c = code[i]
  121. op = ord(c)
  122. opname = dis.opname[op]
  123. i = i+1
  124. oparg = None; pattr = None
  125. if op >= dis.HAVE_ARGUMENT:
  126. oparg = ord(code[i]) + ord(code[i+1])*256
  127. i = i+2
  128. if op in dis.hasconst:
  129. const = co.co_consts[oparg]
  130. if type(const) == types.CodeType:
  131. oparg = const
  132. if const.co_name == '<lambda>':
  133. assert opname == 'LOAD_CONST'
  134. opname = 'LOAD_LAMBDA'
  135. # verify uses 'pattr' for
  136. # comparism, since 'attr' now
  137. # hold Code(const) and thus
  138. # can not be used for
  139. # comparism (todo: thinkg
  140. # about changing this)
  141. #pattr = 'code_object @ 0x%x %s->%s' %\
  142. # (id(const), const.co_filename, const.co_name)
  143. pattr = 'code_object ' + const.co_name
  144. else:
  145. pattr = `const`
  146. elif op in dis.hasname:
  147. pattr = co.co_names[oparg]
  148. elif op in dis.hasjrel:
  149. pattr = `i + oparg`
  150. elif op in dis.haslocal:
  151. pattr = co.co_varnames[oparg]
  152. elif op in dis.hascompare:
  153. pattr = dis.cmp_op[oparg]
  154. if opname == 'SET_LINENO':
  155. continue
  156. elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE',
  157. 'UNPACK_LIST', 'UNPACK_TUPLE',
  158. 'UNPACK_SEQUENCE',
  159. 'MAKE_FUNCTION', 'CALL_FUNCTION',
  160. 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
  161. 'CALL_FUNCTION_VAR_KW', 'DUP_TOPX',
  162. ):
  163. opname = '%s_%d' % (opname, oparg)
  164. customize[opname] = oparg
  165. rv.append(Token(opname, oparg, pattr, offset))
  166. return rv, customize
  167. def find_jump_targets(code):
  168. """Detect all offsets in a byte code which are jump targets.
  169. Return the list of offsets.
  170. This procedure is modelled after dis.findlables(), but here
  171. for each target the number of jumps are counted.
  172. """
  173. targets = {}
  174. n = len(code)
  175. i = 0
  176. while i < n:
  177. c = code[i]
  178. op = ord(c)
  179. i = i+1
  180. if op >= dis.HAVE_ARGUMENT:
  181. oparg = ord(code[i]) + ord(code[i+1])*256
  182. i = i+2
  183. label = -1
  184. if op in dis.hasjrel:
  185. label = i+oparg
  186. # todo: absolut jumps
  187. #elif op in dis.hasjabs:
  188. # label = oparg
  189. if label >= 0:
  190. targets[label] = targets.get(label, 0) + 1
  191. return targets
  192. #
  193. # Parsing
  194. #
  195. class AST:
  196. def __init__(self, type, kids=None):
  197. self.type = intern(type)
  198. if kids == None: kids = []
  199. self._kids = kids
  200. def append(self, o): self._kids.append(o)
  201. def pop(self): return self._kids.pop()
  202. def __getitem__(self, i): return self._kids[i]
  203. def __setitem__(self, i, val): self._kids[i] = val
  204. def __delitem__(self, i): del self._kids[i]
  205. def __len__(self): return len(self._kids)
  206. def __getslice__(self, low, high): return self._kids[low:high]
  207. def __setslice__(self, low, high, seq): self._kids[low:high] = seq
  208. def __delslice__(self, low, high): del self._kids[low:high]
  209. def __cmp__(self, o):
  210. if isinstance(o, AST):
  211. return cmp(self.type, o.type) \
  212. or cmp(self._kids, o._kids)
  213. else:
  214. return cmp(self.type, o)
  215. def __hash__(self): return hash(self.type)
  216. def __repr__(self):
  217. rv = str(self.type)
  218. for k in self._kids:
  219. rv = rv + '\n' + string.replace(str(k), '\n', '\n ')
  220. return rv
  221. # Some ASTs used for comparing code fragments (like 'return None' at
  222. # the end of functions).
  223. RETURN_LOCALS = AST('stmt',
  224. [ AST('return_stmt',
  225. [ AST('expr', [ Token('LOAD_LOCALS') ]),
  226. Token('RETURN_VALUE')]) ])
  227. RETURN_NONE = AST('stmt',
  228. [ AST('return_stmt',
  229. [ AST('expr', [ Token('LOAD_CONST', pattr='None') ]),
  230. Token('RETURN_VALUE')]) ])
  231. ASSIGN_DOC_STRING = lambda doc_string: \
  232. AST('stmt',
  233. [ AST('assign',
  234. [ AST('expr', [ Token('LOAD_CONST', pattr=`doc_string`) ]),
  235. AST('designator', [ Token('STORE_NAME', pattr='__doc__')])
  236. ])])
  237. BUILD_TUPLE_0 = AST('expr',
  238. [ Token('BUILD_TUPLE_0') ] )
  239. from spark import GenericASTBuilder, GenericASTMatcher
  240. class Parser(GenericASTBuilder):
  241. def __init__(self):
  242. GenericASTBuilder.__init__(self, AST, 'code')
  243. self.customized = {}
  244. def cleanup(self):
  245. """
  246. Remove recursive references to allow garbage
  247. collector to collect this object.
  248. """
  249. for dict in (self.rule2func, self.rules, self.rule2name, self.first):
  250. for i in dict.keys():
  251. dict[i] = None
  252. for i in dir(self):
  253. setattr(self, i, None)
  254. def error(self, token):
  255. # output offset, too
  256. print "Syntax error at or near `%s' token at offset %s" % \
  257. (`token`, token.offset)
  258. raise SystemExit
  259. def typestring(self, token):
  260. return token.type
  261. def p_funcdef(self, args):
  262. '''
  263. stmt ::= funcdef
  264. funcdef ::= mkfunc STORE_FAST
  265. funcdef ::= mkfunc STORE_NAME
  266. '''
  267. # new for Python2.0
  268. #
  269. # UNPACK_SEQUENCE # number of tuple items
  270. # EXTENDED_ARG
  271. def p_list_comprehension(self, args):
  272. '''
  273. expr ::= list_compr
  274. list_compr ::= lc_prep lc_for lc_cleanup
  275. lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_NAME
  276. lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_FAST
  277. lc_for ::= expr LOAD_CONST
  278. FOR_LOOP designator
  279. lc_for JUMP_ABSOLUTE
  280. COME_FROM
  281. lc_for ::= expr LOAD_CONST
  282. FOR_LOOP designator
  283. lc_if JUMP_ABSOLUTE
  284. COME_FROM
  285. lc_for ::= expr LOAD_CONST
  286. FOR_LOOP designator
  287. lc_body JUMP_ABSOLUTE
  288. COME_FROM
  289. lc_if ::= expr condjmp lc_body
  290. JUMP_FORWARD COME_FROM POP_TOP
  291. COME_FROM
  292. lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP
  293. lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP
  294. lc_cleanup ::= DELETE_NAME
  295. lc_cleanup ::= DELETE_FAST
  296. '''
  297. def p_augmented_assign(self, args):
  298. '''
  299. stmt ::= augassign1
  300. stmt ::= augassign2
  301. augassign1 ::= expr expr inplace_op designator
  302. augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
  303. augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
  304. augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
  305. augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
  306. augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
  307. augassign2 ::= expr DUP_TOP LOAD_ATTR expr
  308. inplace_op ROT_TWO STORE_ATTR
  309. inplace_op ::= INPLACE_ADD
  310. inplace_op ::= INPLACE_SUBTRACT
  311. inplace_op ::= INPLACE_MULTIPLY
  312. inplace_op ::= INPLACE_DIVIDE
  313. inplace_op ::= INPLACE_MODULO
  314. inplace_op ::= INPLACE_POWER
  315. inplace_op ::= INPLACE_LSHIFT
  316. inplace_op ::= INPLACE_RSHIFT
  317. inplace_op ::= INPLACE_AND
  318. inplace_op ::= INPLACE_XOR
  319. inplace_op ::= INPLACE_OR
  320. '''
  321. def p_assign(self, args):
  322. '''
  323. stmt ::= assign
  324. assign ::= expr DUP_TOP designList
  325. assign ::= expr designator
  326. '''
  327. def p_print(self, args):
  328. '''
  329. stmt ::= print_stmt
  330. stmt ::= print_stmt_nl
  331. stmt ::= print_nl_stmt
  332. print_stmt ::= expr PRINT_ITEM
  333. print_nl_stmt ::= PRINT_NEWLINE
  334. print_stmt_nl ::= print_stmt print_nl_stmt
  335. '''
  336. def p_print_to(self, args):
  337. '''
  338. stmt ::= print_to
  339. stmt ::= print_to_nl
  340. stmt ::= print_nl_to
  341. print_to ::= expr print_to_items POP_TOP
  342. print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
  343. print_nl_to ::= expr PRINT_NEWLINE_TO
  344. print_to_items ::= print_to_items print_to_item
  345. print_to_items ::= print_to_item
  346. print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
  347. '''
  348. # expr print_to* POP_TOP
  349. # expr { print_to* } PRINT_NEWLINE_TO
  350. def p_import15(self, args):
  351. '''
  352. stmt ::= importstmt
  353. stmt ::= importfrom
  354. importstmt ::= IMPORT_NAME STORE_FAST
  355. importstmt ::= IMPORT_NAME STORE_NAME
  356. importfrom ::= IMPORT_NAME importlist POP_TOP
  357. importlist ::= importlist IMPORT_FROM
  358. importlist ::= IMPORT_FROM
  359. '''
  360. def p_import20(self, args):
  361. '''
  362. stmt ::= importstmt2
  363. stmt ::= importfrom2
  364. stmt ::= importstar2
  365. importstmt2 ::= LOAD_CONST import_as
  366. importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR
  367. importfrom2 ::= LOAD_CONST IMPORT_NAME importlist2 POP_TOP
  368. importlist2 ::= importlist2 import_as
  369. importlist2 ::= import_as
  370. import_as ::= IMPORT_NAME STORE_FAST
  371. import_as ::= IMPORT_NAME STORE_NAME
  372. import_as ::= IMPORT_NAME LOAD_ATTR STORE_FAST
  373. import_as ::= IMPORT_NAME LOAD_ATTR STORE_NAME
  374. import_as ::= IMPORT_FROM STORE_FAST
  375. import_as ::= IMPORT_FROM STORE_NAME
  376. '''
  377. # 'import_as' can't use designator, since n_import_as()
  378. # needs to compare both kids' pattr
  379. def p_grammar(self, args):
  380. '''
  381. code ::= stmts
  382. code ::=
  383. stmts ::= stmts stmt
  384. stmts ::= stmt
  385. stmts_opt ::= stmts
  386. stmts_opt ::= passstmt
  387. passstmt ::=
  388. designList ::= designator designator
  389. designList ::= designator DUP_TOP designList
  390. designator ::= STORE_FAST
  391. designator ::= STORE_NAME
  392. designator ::= STORE_GLOBAL
  393. designator ::= expr STORE_ATTR
  394. designator ::= expr STORE_SLICE+0
  395. designator ::= expr expr STORE_SLICE+1
  396. designator ::= expr expr STORE_SLICE+2
  397. designator ::= expr expr expr STORE_SLICE+3
  398. designator ::= store_subscr
  399. store_subscr ::= expr expr STORE_SUBSCR
  400. designator ::= unpack
  401. designator ::= unpack_list
  402. stmt ::= classdef
  403. stmt ::= call_stmt
  404. call_stmt ::= expr POP_TOP
  405. stmt ::= return_stmt
  406. return_stmt ::= expr RETURN_VALUE
  407. stmt ::= break_stmt
  408. break_stmt ::= BREAK_LOOP
  409. stmt ::= continue_stmt
  410. continue_stmt ::= JUMP_ABSOLUTE
  411. stmt ::= raise_stmt
  412. raise_stmt ::= exprlist RAISE_VARARGS
  413. raise_stmt ::= nullexprlist RAISE_VARARGS
  414. stmt ::= exec_stmt
  415. exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
  416. exec_stmt ::= expr exprlist EXEC_STMT
  417. stmt ::= assert
  418. stmt ::= assert2
  419. stmt ::= ifstmt
  420. stmt ::= ifelsestmt
  421. stmt ::= whilestmt
  422. stmt ::= whileelsestmt
  423. stmt ::= forstmt
  424. stmt ::= forelsestmt
  425. stmt ::= trystmt
  426. stmt ::= tryfinallystmt
  427. stmt ::= DELETE_FAST
  428. stmt ::= DELETE_NAME
  429. stmt ::= DELETE_GLOBAL
  430. stmt ::= expr DELETE_SLICE+0
  431. stmt ::= expr expr DELETE_SLICE+1
  432. stmt ::= expr expr DELETE_SLICE+2
  433. stmt ::= expr expr expr DELETE_SLICE+3
  434. stmt ::= delete_subscr
  435. delete_subscr ::= expr expr DELETE_SUBSCR
  436. stmt ::= expr DELETE_ATTR
  437. kwarg ::= LOAD_CONST expr
  438. classdef ::= LOAD_CONST expr mkfunc
  439. CALL_FUNCTION_0 BUILD_CLASS STORE_NAME
  440. classdef ::= LOAD_CONST expr mkfunc
  441. CALL_FUNCTION_0 BUILD_CLASS STORE_FAST
  442. condjmp ::= JUMP_IF_FALSE POP_TOP
  443. condjmp ::= JUMP_IF_TRUE POP_TOP
  444. assert ::= expr JUMP_IF_FALSE POP_TOP
  445. expr JUMP_IF_TRUE POP_TOP
  446. LOAD_GLOBAL RAISE_VARARGS
  447. COME_FROM COME_FROM POP_TOP
  448. assert2 ::= expr JUMP_IF_FALSE POP_TOP
  449. expr JUMP_IF_TRUE POP_TOP
  450. LOAD_GLOBAL expr RAISE_VARARGS
  451. COME_FROM COME_FROM POP_TOP
  452. ifstmt ::= expr condjmp stmts_opt
  453. JUMP_FORWARD COME_FROM POP_TOP
  454. COME_FROM
  455. ifelsestmt ::= expr condjmp stmts_opt
  456. JUMP_FORWARD COME_FROM
  457. POP_TOP stmts COME_FROM
  458. trystmt ::= SETUP_EXCEPT stmts_opt
  459. POP_BLOCK JUMP_FORWARD
  460. COME_FROM except_stmt
  461. try_end ::= END_FINALLY COME_FROM
  462. try_end ::= except_else
  463. except_else ::= END_FINALLY COME_FROM stmts
  464. except_stmt ::= except_cond except_stmt COME_FROM
  465. except_stmt ::= except_conds try_end COME_FROM
  466. except_stmt ::= except try_end COME_FROM
  467. except_stmt ::= try_end
  468. except_conds ::= except_cond except_conds COME_FROM
  469. except_conds ::=
  470. except_cond ::= except_cond1
  471. except_cond ::= except_cond2
  472. except_cond1 ::= DUP_TOP expr COMPARE_OP
  473. JUMP_IF_FALSE
  474. POP_TOP POP_TOP POP_TOP POP_TOP
  475. stmts_opt JUMP_FORWARD COME_FROM
  476. POP_TOP
  477. except_cond2 ::= DUP_TOP expr COMPARE_OP
  478. JUMP_IF_FALSE
  479. POP_TOP POP_TOP designator POP_TOP
  480. stmts_opt JUMP_FORWARD COME_FROM
  481. POP_TOP
  482. except ::= POP_TOP POP_TOP POP_TOP
  483. stmts_opt JUMP_FORWARD
  484. tryfinallystmt ::= SETUP_FINALLY stmts_opt
  485. POP_BLOCK LOAD_CONST
  486. COME_FROM stmts_opt END_FINALLY
  487. whilestmt ::= SETUP_LOOP
  488. expr JUMP_IF_FALSE POP_TOP
  489. stmts_opt JUMP_ABSOLUTE
  490. COME_FROM POP_TOP POP_BLOCK COME_FROM
  491. whileelsestmt ::= SETUP_LOOP
  492. expr JUMP_IF_FALSE POP_TOP
  493. stmts_opt JUMP_ABSOLUTE
  494. COME_FROM POP_TOP POP_BLOCK
  495. stmts COME_FROM
  496. forstmt ::= SETUP_LOOP expr LOAD_CONST
  497. FOR_LOOP designator
  498. stmts_opt JUMP_ABSOLUTE
  499. COME_FROM POP_BLOCK COME_FROM
  500. forelsestmt ::= SETUP_LOOP expr LOAD_CONST
  501. FOR_LOOP designator
  502. stmts_opt JUMP_ABSOLUTE
  503. COME_FROM POP_BLOCK stmts COME_FROM
  504. '''
  505. def p_expr(self, args):
  506. '''
  507. expr ::= mklambda
  508. expr ::= mkfunc
  509. expr ::= SET_LINENO
  510. expr ::= LOAD_FAST
  511. expr ::= LOAD_NAME
  512. expr ::= LOAD_CONST
  513. expr ::= LOAD_GLOBAL
  514. expr ::= LOAD_LOCALS
  515. expr ::= expr LOAD_ATTR
  516. expr ::= binary_expr
  517. binary_expr ::= expr expr binary_op
  518. binary_op ::= BINARY_ADD
  519. binary_op ::= BINARY_SUBTRACT
  520. binary_op ::= BINARY_MULTIPLY
  521. binary_op ::= BINARY_DIVIDE
  522. binary_op ::= BINARY_MODULO
  523. binary_op ::= BINARY_LSHIFT
  524. binary_op ::= BINARY_RSHIFT
  525. binary_op ::= BINARY_AND
  526. binary_op ::= BINARY_OR
  527. binary_op ::= BINARY_XOR
  528. binary_op ::= BINARY_POWER
  529. expr ::= binary_subscr
  530. binary_subscr ::= expr expr BINARY_SUBSCR
  531. expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
  532. expr ::= cmp
  533. expr ::= expr UNARY_POSITIVE
  534. expr ::= expr UNARY_NEGATIVE
  535. expr ::= expr UNARY_CONVERT
  536. expr ::= expr UNARY_INVERT
  537. expr ::= expr UNARY_NOT
  538. expr ::= mapexpr
  539. expr ::= expr SLICE+0
  540. expr ::= expr expr SLICE+1
  541. expr ::= expr expr SLICE+2
  542. expr ::= expr expr expr SLICE+3
  543. expr ::= expr DUP_TOP SLICE+0
  544. expr ::= expr expr DUP_TOPX_2 SLICE+1
  545. expr ::= expr expr DUP_TOPX_2 SLICE+2
  546. expr ::= expr expr expr DUP_TOPX_3 SLICE+3
  547. expr ::= and
  548. expr ::= or
  549. or ::= expr JUMP_IF_TRUE POP_TOP expr COME_FROM
  550. and ::= expr JUMP_IF_FALSE POP_TOP expr COME_FROM
  551. cmp ::= cmp_list
  552. cmp ::= compare
  553. compare ::= expr expr COMPARE_OP
  554. cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
  555. COME_FROM
  556. cmp_list1 ::= expr DUP_TOP ROT_THREE
  557. COMPARE_OP JUMP_IF_FALSE POP_TOP
  558. cmp_list1 COME_FROM
  559. cmp_list1 ::= expr DUP_TOP ROT_THREE
  560. COMPARE_OP JUMP_IF_FALSE POP_TOP
  561. cmp_list2 COME_FROM
  562. cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
  563. mapexpr ::= BUILD_MAP kvlist
  564. kvlist ::= kvlist kv
  565. kvlist ::=
  566. kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
  567. exprlist ::= exprlist expr
  568. exprlist ::= expr
  569. nullexprlist ::=
  570. '''
  571. def nonterminal(self, nt, args):
  572. collect = ('stmts', 'exprlist', 'kvlist')
  573. if nt in collect and len(args) > 1:
  574. #
  575. # Collect iterated thingies together.
  576. #
  577. rv = args[0]
  578. rv.append(args[1])
  579. else:
  580. rv = GenericASTBuilder.nonterminal(self, nt, args)
  581. return rv
  582. def __ambiguity(self, children):
  583. # only for debugging! to be removed hG/2000-10-15
  584. print children
  585. return GenericASTBuilder.ambiguity(self, children)
  586. def resolve(self, list):
  587. if len(list) == 2 and 'funcdef' in list and 'assign' in list:
  588. return 'funcdef'
  589. #sys.stderr.writelines( ['resolve ', str(list), '\n'] )
  590. return GenericASTBuilder.resolve(self, list)
  591. nop = lambda self, args: None
  592. def parse(tokens, customize):
  593. p = Parser()
  594. #
  595. # Special handling for opcodes that take a variable number
  596. # of arguments -- we add a new rule for each:
  597. #
  598. # expr ::= {expr}^n BUILD_LIST_n
  599. # expr ::= {expr}^n BUILD_TUPLE_n
  600. # expr ::= {expr}^n BUILD_SLICE_n
  601. # unpack_list ::= UNPACK_LIST {expr}^n
  602. # unpack ::= UNPACK_TUPLE {expr}^n
  603. # unpack ::= UNPACK_SEQEUENE {expr}^n
  604. # mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
  605. # expr ::= expr {expr}^n CALL_FUNCTION_n
  606. # expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
  607. # expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
  608. # expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
  609. #
  610. for k, v in customize.items():
  611. ## avoid adding the same rule twice to this parser
  612. #if p.customized.has_key(k):
  613. # continue
  614. #p.customized[k] = None
  615. #nop = lambda self, args: None
  616. op = k[:string.rfind(k, '_')]
  617. if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE'):
  618. rule = 'expr ::= ' + 'expr '*v + k
  619. elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
  620. rule = 'unpack ::= ' + k + ' designator'*v
  621. elif op == 'UNPACK_LIST':
  622. rule = 'unpack_list ::= ' + k + ' designator'*v
  623. elif op == 'DUP_TOPX':
  624. # no need to add a rule
  625. pass
  626. #rule = 'dup_topx ::= ' + 'expr '*v + k
  627. elif op == 'MAKE_FUNCTION':
  628. p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
  629. ('expr '*v, k), nop)
  630. rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
  631. elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
  632. 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
  633. na = (v & 0xff) # positional parameters
  634. nk = (v >> 8) & 0xff # keyword parameters
  635. # number of apply equiv arguments:
  636. nak = ( len(op)-len('CALL_FUNCTION') ) / 3
  637. rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \
  638. + 'expr ' * nak + k
  639. else:
  640. raise 'unknown customize token %s' % k
  641. p.addRule(rule, nop)
  642. ast = p.parse(tokens)
  643. p.cleanup()
  644. return ast
  645. #
  646. # Decompilation (walking AST)
  647. #
  648. # All table-driven. Step 1 determines a table (T) and a path to a
  649. # table key (K) from the node type (N) (other nodes are shown as O):
  650. #
  651. # N N N&K
  652. # / | ... \ / | ... \ / | ... \
  653. # O O O O O K O O O
  654. # |
  655. # K
  656. #
  657. # MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT)
  658. #
  659. # The default is a direct mapping. The key K is then extracted from the
  660. # subtree and used to find a table entry T[K], if any. The result is a
  661. # format string and arguments (a la printf()) for the formatting engine.
  662. # Escapes in the format string are:
  663. #
  664. # %c evaluate N[A] recursively*
  665. # %C evaluate N[A[0]]..N[A[1]] recursively, separate by A[2]*
  666. # %, print ',' if last %C only printed one item (for tuples)
  667. # %| tab to current indentation level
  668. # %+ increase current indentation level
  669. # %- decrease current indentation level
  670. # %{...} evaluate ... in context of N
  671. # %% literal '%'
  672. #
  673. # * indicates an argument (A) required.
  674. #
  675. # The '%' may optionally be followed by a number (C) in square brackets, which
  676. # makes the engine walk down to N[C] before evaluating the escape code.
  677. #
  678. from spark import GenericASTTraversal
  679. #TAB = '\t' # as God intended
  680. TAB = ' ' *4 # is less spacy than "\t"
  681. TABLE_R = {
  682. 'build_tuple2': ( '%C', (0,-1,', ') ),
  683. 'POP_TOP': ( '%|%c\n', 0 ),
  684. 'STORE_ATTR': ( '%c.%[1]{pattr}', 0),
  685. # 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ),
  686. 'STORE_SLICE+0':( '%c[:]', 0 ),
  687. 'STORE_SLICE+1':( '%c[%c:]', 0, 1 ),
  688. 'STORE_SLICE+2':( '%c[:%c]', 0, 1 ),
  689. 'STORE_SLICE+3':( '%c[%c:%c]', 0, 1, 2 ),
  690. 'JUMP_ABSOLUTE':( '%|continue\n', ),
  691. 'DELETE_SLICE+0':( '%|del %c[:]\n', 0 ),
  692. 'DELETE_SLICE+1':( '%|del %c[%c:]\n', 0, 1 ),
  693. 'DELETE_SLICE+2':( '%|del %c[:%c]\n', 0, 1 ),
  694. 'DELETE_SLICE+3':( '%|del %c[%c:%c]\n', 0, 1, 2 ),
  695. 'DELETE_ATTR': ( '%|del %c.%[-1]{pattr}\n', 0 ),
  696. #'EXEC_STMT': ( '%|exec %c in %[1]C\n', 0, (0,sys.maxint,', ') ),
  697. 'BINARY_SUBSCR':( '%c[%c]', 0, 1), # required for augmented assign
  698. 'UNARY_POSITIVE':( '+%c', 0 ),
  699. 'UNARY_NEGATIVE':( '-%c', 0 ),
  700. 'UNARY_CONVERT':( '`%c`', 0 ),
  701. 'UNARY_INVERT': ( '~%c', 0 ),
  702. 'UNARY_NOT': ( '(not %c)', 0 ),
  703. 'SLICE+0': ( '%c[:]', 0 ),
  704. 'SLICE+1': ( '%c[%c:]', 0, 1 ),
  705. 'SLICE+2': ( '%c[:%c]', 0, 1 ),
  706. 'SLICE+3': ( '%c[%c:%c]', 0, 1, 2 ),
  707. }
  708. TABLE_R0 = {
  709. # 'BUILD_LIST': ( '[%C]', (0,-1,', ') ),
  710. # 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ),
  711. # 'CALL_FUNCTION':( '%c(%C)', 0, (1,-1,', ') ),
  712. }
  713. TABLE_DIRECT = {
  714. 'BINARY_ADD': ( '+' ,),
  715. 'BINARY_SUBTRACT': ( '-' ,),
  716. 'BINARY_MULTIPLY': ( '*' ,),
  717. 'BINARY_DIVIDE': ( '/' ,),
  718. 'BINARY_MODULO': ( '%%',),
  719. 'BINARY_POWER': ( '**',),
  720. 'BINARY_LSHIFT': ( '<<',),
  721. 'BINARY_RSHIFT': ( '>>',),
  722. 'BINARY_AND': ( '&' ,),
  723. 'BINARY_OR': ( '|' ,),
  724. 'BINARY_XOR': ( '^' ,),
  725. 'INPLACE_ADD': ( '+=' ,),
  726. 'INPLACE_SUBTRACT': ( '-=' ,),
  727. 'INPLACE_MULTIPLY': ( '*=' ,),
  728. 'INPLACE_DIVIDE': ( '/=' ,),
  729. 'INPLACE_MODULO': ( '%%=',),
  730. 'INPLACE_POWER': ( '**=',),
  731. 'INPLACE_LSHIFT': ( '<<=',),
  732. 'INPLACE_RSHIFT': ( '>>=',),
  733. 'INPLACE_AND': ( '&=' ,),
  734. 'INPLACE_OR': ( '|=' ,),
  735. 'INPLACE_XOR': ( '^=' ,),
  736. 'binary_expr': ( '(%c %c %c)', 0, -1, 1 ),
  737. 'IMPORT_FROM': ( '%{pattr}', ),
  738. 'LOAD_ATTR': ( '.%{pattr}', ),
  739. 'LOAD_FAST': ( '%{pattr}', ),
  740. 'LOAD_NAME': ( '%{pattr}', ),
  741. 'LOAD_GLOBAL': ( '%{pattr}', ),
  742. 'LOAD_LOCALS': ( 'locals()', ),
  743. #'LOAD_CONST': ( '%{pattr}', ), handled below
  744. 'DELETE_FAST': ( '%|del %{pattr}\n', ),
  745. 'DELETE_NAME': ( '%|del %{pattr}\n', ),
  746. 'DELETE_GLOBAL':( '%|del %{pattr}\n', ),
  747. 'delete_subscr':( '%|del %c[%c]\n', 0, 1,),
  748. 'binary_subscr':( '%c[%c]', 0, 1),
  749. 'store_subscr': ( '%c[%c]', 0, 1),
  750. 'STORE_FAST': ( '%{pattr}', ),
  751. 'STORE_NAME': ( '%{pattr}', ),
  752. 'STORE_GLOBAL': ( '%{pattr}', ),
  753. 'unpack': ( '(%C,)', (1, sys.maxint, ', ') ),
  754. 'unpack_list': ( '[%C]', (1, sys.maxint, ', ') ),
  755. 'list_compr': ( '[ %c ]', 1),
  756. # 'lc_for': ( ' for %c in %c', 3, 0 ),
  757. 'lc_for_nest': ( ' for %c in %c%c', 3, 0, 4 ),
  758. 'lc_if': ( ' if %c', 0 ),
  759. 'lc_body': ( '%c', 1),
  760. 'lc_body__': ( '', ),
  761. 'assign': ( '%|%c = %c\n', -1, 0 ),
  762. 'augassign1': ( '%|%c %c %c\n', 0, 2, 1),
  763. 'augassign2': ( '%|%c%c %c %c\n', 0, 2, -3, -4),
  764. #'dup_topx': ('%c', 0),
  765. 'designList': ( '%c = %c', 0, -1 ),
  766. 'and': ( '(%c and %c)', 0, 3 ),
  767. 'or': ( '(%c or %c)', 0, 3 ),
  768. 'compare': ( '(%c %[-1]{pattr} %c)', 0, 1 ),
  769. 'cmp_list': ('%c %c', 0, 1),
  770. 'cmp_list1': ('%[3]{pattr} %c %c', 0, -2),
  771. 'cmp_list2': ('%[1]{pattr} %c', 0),
  772. 'classdef': ( '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ),
  773. 'funcdef': ( '\n%|def %c\n', 0),
  774. 'kwarg': ( '%[0]{pattr[1:-1]}=%c', 1),
  775. 'importstmt': ( '%|import %[0]{pattr}\n', ),
  776. 'importfrom': ( '%|from %[0]{pattr} import %c\n', 1 ),
  777. 'importlist': ( '%C', (0, sys.maxint, ', ') ),
  778. 'importstmt2': ( '%|import %c\n', 1),
  779. 'importstar2': ( '%|from %[1]{pattr} import *\n', ),
  780. 'importfrom2': ( '%|from %[1]{pattr} import %c\n', 2 ),
  781. 'importlist2': ( '%C', (0, sys.maxint, ', ') ),
  782. 'assert': ( '%|assert %c\n' , 3 ),
  783. 'assert2': ( '%|assert %c, %c\n' , 3, -5 ),
  784. 'print_stmt': ( '%|print %c,\n', 0 ),
  785. 'print_stmt_nl': ( '%|print %[0]C\n', (0,1, None) ),
  786. 'print_nl_stmt': ( '%|print\n', ),
  787. 'print_to': ( '%|print >> %c, %c,\n', 0, 1 ),
  788. 'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ),
  789. 'print_nl_to': ( '%|print >> %c\n', 0 ),
  790. 'print_to_items': ( '%C', (0, 2, ', ') ),
  791. 'call_stmt': ( '%|%c\n', 0),
  792. 'break_stmt': ( '%|break\n', ),
  793. 'continue_stmt':( '%|continue\n', ),
  794. 'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ),
  795. 'return_stmt': ( '%|return %c\n', 0),
  796. 'return_lambda': ( '%c', 0),
  797. 'ifstmt': ( '%|if %c:\n%+%c%-', 0, 2 ),
  798. 'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ),
  799. 'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 2, -2 ),
  800. 'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 2, -2 ),
  801. 'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 2 ),
  802. 'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ),
  803. 'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 4 ),
  804. 'whileelsestmt':( '%|while %c:\n%+%c%-\n%|else:\n%+%c%-\n', 1, 4, 9 ),
  805. 'forstmt': ( '%|for %c in %c:\n%+%c%-\n', 4, 1, 5 ),
  806. 'forelsestmt': (
  807. '%|for %c in %c:\n%+%c%-\n%|else:\n%+%c%-\n', 4, 1, 5, 9
  808. ),
  809. 'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ),
  810. 'except': ( '%|except:\n%+%c%-', 3 ),
  811. 'except_cond1': ( '%|except %c:\n%+%c%-', 1, 8 ),
  812. 'except_cond2': ( '%|except %c, %c:\n%+%c%-', 1, 6, 8 ),
  813. 'except_else': ( '%|else:\n%+%c%-', 2 ),
  814. 'tryfinallystmt':( '%|try:\n%+%c%-\n%|finally:\n%+%c%-\n', 1, 5 ),
  815. 'passstmt': ( '%|pass\n', ),
  816. 'STORE_FAST': ( '%{pattr}', ),
  817. 'kv': ( '%c: %c', 3, 1 ),
  818. 'mapexpr': ( '{%[1]C}', (0,sys.maxint,', ') ),
  819. }
  820. MAP_DIRECT = (TABLE_DIRECT, )
  821. MAP_R0 = (TABLE_R0, -1, 0)
  822. MAP_R = (TABLE_R, -1)
  823. MAP = {
  824. 'stmt': MAP_R,
  825. 'designator': MAP_R,
  826. 'expr': MAP_R,
  827. 'exprlist': MAP_R0,
  828. }
  829. ASSIGN_TUPLE_PARAM = lambda param_name: \
  830. AST('expr', [ Token('LOAD_FAST', pattr=param_name) ])
  831. def get_tuple_parameter(ast, name):
  832. """
  833. If the name of the formal parameter starts with dot,
  834. it's a tuple parameter, like this:
  835. def MyFunc(xx, (a,b,c), yy):
  836. print a, b*2, c*42
  837. In byte-code, the whole tuple is assigned to parameter '.1' and
  838. then the tuple gets unpacked to 'a', 'b' and 'c'.
  839. Since identifiers starting with a dot are illegal in Python,
  840. we can search for the byte-code equivalent to '(a,b,c) = .1'
  841. """
  842. assert ast == 'code' and ast[0] == 'stmts'
  843. for i in xrange(len(ast[0])):
  844. # search for an assign-statement
  845. assert ast[0][i] == 'stmt'
  846. node = ast[0][i][0]
  847. if node == 'assign' \
  848. and node[0] == ASSIGN_TUPLE_PARAM(name):
  849. # okay, this assigns '.n' to something
  850. del ast[0][i]
  851. # walk lhs; this
  852. # returns a tuple of identifiers as used
  853. # within the function definition
  854. assert node[1] == 'designator'
  855. # if lhs is not a UNPACK_TUPLE (or equiv.),
  856. # add parenteses to make this a tuple
  857. if node[1][0] not in ('unpack', 'unpack_list'):
  858. return '(' + walk(node[1]) + ')'
  859. return walk(node[1])
  860. raise "Can't find tuple parameter" % name
  861. def make_function(self, code, defparams, isLambda, nested=1):
  862. """Dump function defintion, doc string, and function body."""
  863. def build_param(ast, name, default):
  864. """build parameters:
  865. - handle defaults
  866. - handle format tuple parameters
  867. """
  868. # if formal parameter is a tuple, the paramater name
  869. # starts with a dot (eg. '.1', '.2')
  870. if name[0] == '.':
  871. # replace the name with the tuple-string
  872. name = get_tuple_parameter(ast, name)
  873. if default:
  874. if Showast:
  875. print '--', name
  876. print default
  877. print '--'
  878. result = '%s = %s' % ( name, walk(default, indent=0) )
  879. ##w = Walk(default, 0)
  880. ##result = '%s = %s' % ( name, w.traverse() )
  881. ##del w # hg/2000-09-03
  882. if result[-2:] == '= ': # default was 'LOAD_CONST None'
  883. result = result + 'None'
  884. return result
  885. else:
  886. return name
  887. def writeParams(self, params):
  888. for i in range(len(params)):
  889. if i > 0: self.f.write(', ')
  890. self.f.write(params[i])
  891. assert type(code) == types.CodeType
  892. code = Code(code)
  893. #assert isinstance(code, Code)
  894. ast = _build_ast(self.f, code._tokens, code._customize)
  895. code._tokens = None # save memory
  896. assert ast == 'code' and ast[0] == 'stmts'
  897. if isLambda:
  898. # convert 'return' statement to expression
  899. #assert len(ast[0]) == 1 wrong, see 'lambda (r,b): r,b,g'
  900. assert ast[-1][-1] == 'stmt'
  901. assert len(ast[-1][-1]) == 1
  902. assert ast[-1][-1][0] == 'return_stmt'
  903. ast[-1][-1][0].type = 'return_lambda'
  904. else:
  905. if ast[0][-1] == RETURN_NONE:
  906. # Python adds a 'return None' to the
  907. # end of any function; remove it
  908. ast[0].pop() # remove last node
  909. # add defaults values to parameter names
  910. argc = code.co_argcount
  911. paramnames = list(code.co_varnames[:argc])
  912. # defaults are for last n parameters, thus reverse
  913. paramnames.reverse(); defparams.reverse()
  914. # build parameters
  915. #
  916. ##This would be a nicer piece of code, but I can't get this to work
  917. ## now, have to find a usable lambda constuct hG/2000-09-05
  918. ##params = map(lambda name, default: build_param(ast, name, default),
  919. ## paramnames, defparams)
  920. params = []
  921. for name, default in map(lambda a,b: (a,b), paramnames, defparams):
  922. params.append( build_param(ast, name, default) )
  923. params.reverse() # back to correct order
  924. if 4 & code.co_flags: # flag 2 -> variable number of args
  925. params.append('*%s' % code.co_varnames[argc])
  926. argc = argc +1
  927. if 8 & code.co_flags: # flag 3 -> keyword args
  928. params.append('**%s' % code.co_varnames[argc])
  929. argc = argc +1
  930. # dump parameter list (with default values)
  931. indent = TAB * self.indent
  932. if isLambda:
  933. self.f.write('lambda ')
  934. writeParams(self, params)
  935. self.f.write(': ')
  936. else:
  937. self.f.write('(')
  938. writeParams(self, params)
  939. self.f.write('):\n')
  940. #self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags))
  941. if code.co_consts[0] != None: # docstring exists, dump it
  942. self.f.writelines([indent, `code.co_consts[0]`, '\n'])
  943. _gen_source(self.f, ast, code._customize, self.indent,
  944. isLambda=isLambda)
  945. code._tokens = None; code._customize = None # save memory
  946. def build_class(self, code):
  947. """Dump class definition, duc string and class body."""
  948. assert type(code) == types.CodeType
  949. code = Code(code)
  950. #assert isinstance(code, Code)
  951. indent = TAB * self.indent
  952. #self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags))
  953. ast = _build_ast(self.f, code._tokens, code._customize)
  954. code._tokens = None # save memory
  955. assert ast == 'code' and ast[0] == 'stmts'
  956. # if docstring exists, dump it
  957. if code.co_consts[0] != None \
  958. and ast[0][0] == ASSIGN_DOC_STRING(code.co_consts[0]):
  959. #print '\n\n>>-->>doc string set\n\n'
  960. self.f.writelines( [indent,repr(code.co_consts[0]), '\n'] )
  961. del ast[0][0]
  962. # the function defining a class normally returns locals(); we
  963. # don't want this to show up in the source, thus remove the node
  964. if ast[0][-1] == RETURN_LOCALS:
  965. ast[0].pop() # remove last node
  966. _gen_source(self.f, ast, code._customize, self.indent)
  967. code._tokens = None; code._customize = None # save memory
  968. __globals_tokens__ = ('STORE_GLOBAL', 'DELETE_GLOBAL') # 'LOAD_GLOBAL'
  969. def find_globals(node, globals):
  970. """Find globals in this statement."""
  971. for n in node:
  972. if isinstance(n, AST):
  973. if n != 'stmt': # skip nested statements
  974. globals = find_globals(n, globals)
  975. elif n.type in __globals_tokens__:
  976. globals[n.pattr] = None
  977. return globals
  978. class Walk(GenericASTTraversal):
  979. def __init__(self, ast, indent=0, isLambda=0):
  980. GenericASTTraversal.__init__(self, ast)
  981. self._globals = {}
  982. self.f = cStringIO.StringIO()
  983. self.f.seek(0)
  984. self.indent = indent
  985. self.isLambda = isLambda
  986. def __del__(self):
  987. self.f.close()
  988. def traverse(self, node=None):
  989. self.preorder(node)
  990. return self.f.getvalue()
  991. def n_LOAD_CONST(self, node):
  992. data = node.pattr
  993. if data == 'None':
  994. # LOAD_CONST 'None' only occurs, when None is
  995. # implicit eg. in 'return' w/o params
  996. pass
  997. elif data == 'Ellipsis':
  998. self.f.write('...')
  999. elif data[0] == '-': # assume negative integer constant
  1000. # convert to hex, since decimal representation
  1001. # would result in 'LOAD_CONST; UNARY_NEGATIVE'
  1002. self.f.write('0x%x' % int(data))
  1003. else:
  1004. self.f.write(data)
  1005. def n_delete_subscr(self, node):
  1006. #print >>self.f, '>#', node
  1007. #print >>self.f, '---'
  1008. maybe_tuple = node[-2][-1]
  1009. #print >>self.f, '##', maybe_tuple, maybe_tuple.type[:11]
  1010. if maybe_tuple.type[:11] == 'BUILD_TUPLE':
  1011. maybe_tuple.type = 'build_tuple2'
  1012. #print >>self.f, '##', node
  1013. #print >>self.f, '##', maybe_tuple.type
  1014. self.default(node)
  1015. n_store_subscr = n_binary_subscr = n_delete_subscr
  1016. def __n_stmts(self, node):
  1017. # optimize "print 1, ; print"
  1018. last = None; i = 0
  1019. while i < len(node):
  1020. n = node[i]
  1021. assert(n == 'stmt')
  1022. if n[0] == 'print_nl_stmt' and \
  1023. last is not None and \
  1024. last[0] == 'print_stmt':
  1025. last[0].type = 'print_stmt_nl'
  1026. del node[i]
  1027. last = None
  1028. else:
  1029. last = n
  1030. i = i + 1
  1031. self.default(node)
  1032. def n_stmt(self, node):
  1033. if not self.isLambda:
  1034. indent = TAB * self.indent
  1035. for g in find_globals(node, {}).keys():
  1036. self.f.writelines( [indent,
  1037. 'global ',
  1038. g, '\n'] )
  1039. ## nice output does not work since engine()
  1040. ## creates a new Walk instance when recursing
  1041. ## TODO: reconsider this: engine() no longer
  1042. ## creates a new Walk instancew hG/2000-12-31
  1043. ## if not self._globals.has_key(g):
  1044. ## self._globals[g] = None
  1045. ## self.f.writelines( [TAB * self.indent,
  1046. ## 'global ',
  1047. ## g, '\n'] )
  1048. self.default(node)
  1049. def n_exec_stmt(self, node):
  1050. """
  1051. exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
  1052. exec_stmt ::= expr exprlist EXEC_STMT
  1053. """
  1054. w = Walk(node, indent=self.indent)
  1055. w.engine(( '%|exec %c in %[1]C', 0, (0,sys.maxint,', ') ),
  1056. node)
  1057. s = w.f.getvalue()
  1058. del w
  1059. if s[-3:] == 'in ':
  1060. s = s[:-3]
  1061. self.f.writelines( [s, '\n'] )
  1062. node[:] = [] # avoid print out when recursive descenting
  1063. def n_ifelsestmt(self, node, preprocess=0):
  1064. if len(node[-2]) == 1:
  1065. ifnode = node[-2][0][0]
  1066. if ifnode == 'ifelsestmt':
  1067. node.type = 'ifelifstmt'
  1068. self.n_ifelsestmt(ifnode, preprocess=1)
  1069. if ifnode == 'ifelifstmt':
  1070. ifnode.type = 'elifelifstmt'
  1071. elif ifnode == 'ifelsestmt':
  1072. ifnode.type = 'elifelsestmt'
  1073. elif ifnode == 'ifstmt':
  1074. node.type = 'ifelifstmt'
  1075. ifnode.type = 'elifstmt'
  1076. if not preprocess:
  1077. self.default(node)
  1078. def n_import_as(self, node):
  1079. iname = node[0].pattr; sname = node[-1].pattr
  1080. if iname == sname \
  1081. or iname[:len(sname)+1] == (sname+'.'):
  1082. self.f.write(iname)
  1083. else:
  1084. self.f.writelines([iname, ' as ', sname])
  1085. node[:] = [] # avoid print out when recursive descenting
  1086. def n_mkfunc(self, node):
  1087. defparams = node[0:-2]
  1088. code = node[-2].attr
  1089. node[:] = [] # avoid print out when recursive descenting
  1090. self.indent = self.indent + 1
  1091. self.f.write(code.co_name)
  1092. make_function(self, code, defparams, isLambda=0)
  1093. self.indent = self.indent - 1
  1094. def n_mklambda(self, node):
  1095. defparams = node[0:-2]
  1096. code = node[-2].attr
  1097. node[:] = [] # avoid print out when recursive descenting
  1098. make_function(self, code, defparams, isLambda=1)
  1099. def n_classdef(self, node):
  1100. self.f.writelines(['\n', TAB * self.indent, 'class '])
  1101. self.f.write(node[0].pattr[1:-1])
  1102. node._code = node[-4][0].attr
  1103. # avoid print out when recursive descenting
  1104. if node[1] == BUILD_TUPLE_0:
  1105. node[:] = []
  1106. else:
  1107. node[:] = [ node[1] ]
  1108. def n_classdef_exit(self, node):
  1109. self.f.write(':\n')
  1110. self.indent = self.indent +1
  1111. # '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ),
  1112. # -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code)
  1113. build_class(self,node._code)
  1114. self.indent = self.indent -1
  1115. node._code = None # save memory
  1116. def n_lc_for(self, node):
  1117. node.type = 'lc_for_nest'
  1118. content = node[4]
  1119. while content == 'lc_for':
  1120. content.type = 'lc_for_nest'
  1121. content = content[4]
  1122. while content == 'lc_if':
  1123. content = content[2]
  1124. assert content == 'lc_body'
  1125. self.preorder(content)
  1126. content.type = 'lc_body__'
  1127. self.default(node)
  1128. def engine(self, entry, startnode):
  1129. #self.f.write("-----\n")
  1130. #self.f.write(str(startnode.__dict__)); self.f.write('\n')
  1131. escape = re.compile(r'''
  1132. % ( \[ (?P<child> -? \d+ ) \] )?
  1133. ((?P<type> [^{] ) |
  1134. ( [{] (?P<expr> [^}]* ) [}] ))
  1135. ''', re.VERBOSE)
  1136. fmt = entry[0]
  1137. n = len(fmt)
  1138. lastC = 0
  1139. arg = 1
  1140. i = 0
  1141. while i < n:
  1142. m = escape.match(fmt, i)
  1143. if m is None:
  1144. self.f.write(fmt[i])
  1145. i = i + 1
  1146. continue
  1147. i = m.end()
  1148. typ = m.group('type') or '{'
  1149. node = startnode
  1150. try:
  1151. if m.group('child'):
  1152. node = node[string.atoi(m.group('child'))]
  1153. except:
  1154. print node.__dict__
  1155. raise
  1156. if typ == '%':
  1157. self.f.write('%')
  1158. elif typ == '+':
  1159. self.indent = self.indent + 1
  1160. elif typ == '-':
  1161. self.indent = self.indent - 1
  1162. elif typ == '|':
  1163. self.f.write(TAB * self.indent)
  1164. elif typ == ',':
  1165. if lastC == 1:
  1166. self.f.write(',')
  1167. elif typ == 'c':
  1168. self.traverse(node[entry[arg]])
  1169. ##w = Walk(node[entry[arg]], self.indent)
  1170. ##self.f.write(w.traverse())
  1171. ##del w # hg/2000-09-03
  1172. arg = arg + 1
  1173. elif typ == 'C':
  1174. low, high, sep = entry[arg]
  1175. lastC = remaining = len(node[low:high])
  1176. for subnode in node[low:high]:
  1177. self.traverse(subnode)
  1178. ##w = Walk(subnode, self.indent)
  1179. ##self.f.write(w.traverse())
  1180. ##del w # hg/2000-09-03
  1181. remaining = remaining - 1
  1182. if remaining > 0:
  1183. self.f.write(sep)
  1184. arg = arg + 1
  1185. elif typ == '{':
  1186. d = node.__dict__
  1187. expr = m.group('expr')
  1188. if expr == 'build_class':
  1189. # -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code)
  1190. build_class(self,node[-4][-2].attr)
  1191. else:
  1192. try:
  1193. self.f.write(eval(expr, d, d))
  1194. except:
  1195. print node
  1196. raise
  1197. def default(self, node):
  1198. mapping = MAP.get(node, MAP_DIRECT)
  1199. table = mapping[0]
  1200. key = node
  1201. for i in mapping[1:]:
  1202. key = key[i]
  1203. if table.has_key(key):
  1204. self.engine(table[key], node)
  1205. self.prune()
  1206. def walk(ast, customize={}, indent=0, isLambda=0):
  1207. w = Walk(ast, indent, isLambda=isLambda)
  1208. #
  1209. # Special handling for opcodes that take a variable number
  1210. # of arguments -- we add a new entry for each in TABLE_R.
  1211. #
  1212. for k, v in customize.items():
  1213. op = k[:string.rfind(k, '_')]
  1214. if op == 'BUILD_LIST':
  1215. TABLE_R[k] = ( '[%C]', (0,-1,', ') )
  1216. elif op == 'BUILD_SLICE':
  1217. TABLE_R[k] = ( '%C', (0,-1,':') )
  1218. elif op == 'BUILD_TUPLE':
  1219. TABLE_R[k] = ( '(%C%,)', (0,-1,', ') )
  1220. elif op == 'CALL_FUNCTION':
  1221. TABLE_R[k] = ( '%c(%C)', 0, (1,-1,', ') )
  1222. elif op in ('CALL_FUNCTION_VAR',
  1223. 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
  1224. if v == 0:
  1225. str = '%c(%C' # '%C' is a dummy here ...
  1226. p2 = (0, 0, None) # .. because of this
  1227. else:
  1228. str = '%c(%C, '
  1229. p2 = (1,-2, ', ')
  1230. if op == 'CALL_FUNCTION_VAR':
  1231. str = str + '*%c)'
  1232. entry = (str, 0, p2, -2)
  1233. elif op == 'CALL_FUNCTION_KW':
  1234. str = str + '**%c)'
  1235. entry = (str, 0, p2, -2)
  1236. else:
  1237. str = str + '*%c, **%c)'
  1238. if p2[2]: p2 = (1,-3, ', ')
  1239. entry = (str, 0, p2, -3, -2)
  1240. TABLE_R[k] = entry
  1241. result = w.traverse()
  1242. return result
  1243. #-- end of (de-)compiler ---
  1244. #-- start
  1245. Showasm = 0
  1246. Showast = 0
  1247. __real_out = None
  1248. def _tokenize(out, co):
  1249. """Disassemble code object into a token list"""
  1250. assert type(co) == types.CodeType
  1251. tokens, customize = disassemble(co)
  1252. # See the disassembly..
  1253. if Showasm and out is not None:
  1254. for t in tokens:
  1255. out.write('%s\n' % t)
  1256. out.write('\n')
  1257. return tokens, customize
  1258. def _build_ast(out, tokens, customize):
  1259. assert type(tokens) == types.ListType
  1260. assert isinstance(tokens[0], Token)
  1261. # Build AST from disassembly.
  1262. try:
  1263. ast = parse(tokens, customize)
  1264. except: # parser failed, dump disassembly
  1265. #if not Showasm:
  1266. __real_out.write('--- This code section failed: ---\n')
  1267. for t in tokens:
  1268. __real_out.write('%s\n' % t)
  1269. __real_out.write('\n')
  1270. raise
  1271. return ast
  1272. def _gen_source(out, ast, customize, indent=0, isLambda=0):
  1273. """convert AST to source code"""
  1274. if Showast:
  1275. out.write(`ast`)
  1276. # if code would be empty, append 'pass'
  1277. if len(ast[0]) == 0:
  1278. out.write(indent * TAB)
  1279. out.write('pass\n')
  1280. else:
  1281. out.write(walk(ast, customize, indent, isLambda=isLambda))
  1282. def decompyle(co, out=None, indent=0, showasm=0, showast=0):
  1283. """
  1284. diassembles a given code block 'co'
  1285. """
  1286. assert type(co) == types.CodeType
  1287. global Showasm, Showast
  1288. Showasm = showasm
  1289. Showast = showast
  1290. if not out:
  1291. out = sys.stdout
  1292. global __real_out
  1293. __real_out = out # store final output stream for case of error
  1294. tokens, customize = _tokenize(out, co)
  1295. ast = _build_ast(out, tokens, customize)
  1296. tokens = None # save memory
  1297. assert ast == 'code' and ast[0] == 'stmts'
  1298. # convert leading '__doc__ = "..." into doc string
  1299. if ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
  1300. out.writelines( [repr(co.co_consts[0]), '\n'] )
  1301. del ast[0][0]
  1302. if ast[0][-1] == RETURN_NONE:
  1303. ast[0].pop() # remove last node
  1304. #todo: if empty, add 'pass'
  1305. _gen_source(out, ast, customize, indent)
  1306. def decompyle_file(filename, outstream=None, showasm=0, showast=0):
  1307. """
  1308. decompile Python byte-code file (.pyc)
  1309. """
  1310. co = _load_module(filename)
  1311. decompyle(co, out=outstream, showasm=showasm, showast=showast)
  1312. co = None