PageRenderTime 63ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/JavaScript Libraries/mcarter-js.io-c990d6f/compilers/pyjsiocompile/pyjsiocompile/compile.py

https://bitbucket.org/gwbasic/objectcloud
Python | 569 lines | 509 code | 31 blank | 29 comment | 68 complexity | ee82ac654716c274fc42df319497fda7 MD5 | raw file
Possible License(s): JSON, LGPL-2.1, MPL-2.0-no-copyleft-exception
  1. import logging
  2. import os
  3. import sys
  4. from urllib2 import urlopen
  5. fileopen = open
  6. from BeautifulSoup import BeautifulSoup as Soup
  7. try:
  8. import json
  9. except:
  10. import simplejson as json
  11. log = logging.getLogger(__name__)
  12. log.setLevel(logging.WARN)
  13. log.addHandler(logging.StreamHandler())
  14. def make_option_parser():
  15. from optparse import OptionParser
  16. parser = OptionParser("usage: %prog [options] inputfile")
  17. parser.add_option("-j", "--jsio-path",
  18. dest="jsio", type="string",
  19. default="http://js.io/svn/js.io/trunk/jsio",
  20. help="jsio source path")
  21. parser.add_option("-o", "--output",
  22. dest="output", type="string",
  23. default="output.js",
  24. help="output FILENAME", metavar="FILENAME")
  25. parser.add_option("-e", "--environment",
  26. dest="environment", type="string", default="browser",
  27. help="target environment (e.g. browser or node)")
  28. parser.add_option("-t", "--transport",
  29. dest="transport", type="string",
  30. default="csp",
  31. help="target transport (e.g. csp or tcp)")
  32. parser.add_option("--v",
  33. action="store_const", const=logging.INFO, dest="verbose")
  34. parser.add_option("--vv",
  35. action="store_const", const=logging.DEBUG, dest="verbose")
  36. parser.add_option("-d", "--dont-compress",
  37. action="store_false", dest="minify", default=True,
  38. help="Don't minify the output")
  39. return parser
  40. def main(argv=None):
  41. if argv == None:
  42. argv = sys.argv[1:]
  43. parser = make_option_parser()
  44. (options, args) = parser.parse_args(argv)
  45. log.debug(options)
  46. log.setLevel(options.verbose or logging.WARN)
  47. if len(args) != 1:
  48. print "Invalid position arguments"
  49. parser.print_help()
  50. sys.exit(1)
  51. INPUT = args[0]
  52. OUTPUT = options.output
  53. BASEDIR = os.path.dirname(INPUT)
  54. if INPUT.split('.')[-1] not in ('html', 'js', 'pkg'):
  55. print "Invalid input file; jsio_compile only operats on .js and .html files"
  56. sys.exit(1)
  57. if INPUT.endswith('.pkg'):
  58. pkg_data = json.loads(get_source(INPUT))
  59. pkg_data['root'] = str(pkg_data['root'])
  60. target = join_paths(BASEDIR, pkg_data['root'] + '.js')
  61. output = compile_source(target, options, extras=[pkg_data['root']])
  62. output += '\njsio("import %s");\ndelete jsio;\n' % (pkg_data['root'])
  63. else:
  64. output = compile_source(INPUT, options)
  65. if options.minify:
  66. log.info("Minifying")
  67. output = minify(output)
  68. else:
  69. log.info("Skipping minify")
  70. print "Writing output %s" % OUTPUT
  71. f = fileopen(OUTPUT, 'w')
  72. f.write(output)
  73. f.close()
  74. def join_paths(*paths):
  75. if '://' in paths[0]:
  76. return '/'.join(paths)
  77. else:
  78. return os.path.join(*paths)
  79. def minify(src):
  80. import StringIO
  81. jsm = JavascriptMinify()
  82. o = StringIO.StringIO()
  83. jsm.minify(StringIO.StringIO(src), o)
  84. return o.getvalue()
  85. def get_source(target):
  86. log.debug('fetching source from %s', target)
  87. if '://' in target:
  88. return urlopen(target).read()
  89. else:
  90. return fileopen(target).read()
  91. def compile_source(target, options, extras=[]):
  92. log.info('compiling %s', target)
  93. orig_source = get_source(target)
  94. if target.endswith('.html'):
  95. soup = Soup(orig_source)
  96. orig_source = ""
  97. for script in select(soup, 'script'):
  98. if 'src' in dict(script.attrs):
  99. continue
  100. target += script.contents[0]
  101. target_source = remove_comments(target)
  102. target_module = os.path.relpath(target).split('/')[-1].split('.')[0]
  103. env_path = 'jsio.env.' + options.environment + '.' + options.transport
  104. checked = [target_module,
  105. 'jsio', 'jsio.env', env_path,
  106. 'log', 'Class', 'bind']
  107. dependancies = map(lambda x: (x, ''),
  108. (extract_dependancies(target_source) + extras))
  109. env = remove_comments(get_source(join_paths(options.jsio, 'env',
  110. options.environment,
  111. options.transport + '.js')))
  112. dependancies.extend(map(lambda x: \
  113. (x, 'jsio.env.%s.' % options.environment),
  114. extract_dependancies(env)))
  115. log.debug('checked is %s', checked)
  116. while dependancies:
  117. pkg, path = dependancies.pop(0)
  118. full_path = joinModulePath(path, pkg)
  119. log.debug('full_path: %s', full_path)
  120. if full_path in checked:
  121. continue
  122. log.debug('checking dependancy %s', full_path)
  123. target = path_for_module(full_path, prefix=options.jsio)
  124. src = remove_comments(get_source(target))
  125. depends = map(lambda x: (x, full_path), extract_dependancies(src))
  126. dependancies.extend(depends)
  127. checked.append(full_path)
  128. sources = {}
  129. log.debug('checked is %s', checked)
  130. for full_path in checked:
  131. if full_path in (target_module, 'jsio', # 'jsio.env',
  132. 'log', 'Class', 'bind'):
  133. continue
  134. log.info("Loading dependancy %s", full_path)
  135. filename = path_for_module(full_path, prefix=options.jsio)
  136. src = get_source(filename)
  137. virtual_filename = path_for_module(full_path, prefix='jsio')
  138. log.debug(virtual_filename)
  139. sources[full_path] = {'src': minify(src), 'url': virtual_filename, }
  140. out = ',\n'.join([ repr(str(key)) + ": " + json.dumps(val)
  141. for (key, val) in sources.items() ])
  142. jsio_src = get_source(join_paths(options.jsio, 'jsio.js'))
  143. final_output = \
  144. jsio_src.replace(" // Insert pre-loaded modules here...", out)
  145. return final_output
  146. def path_for_module(full_path, prefix):
  147. path_components = full_path.split('.')
  148. if full_path == 'jsio':
  149. path_components = [prefix, full_path]
  150. elif (path_components[0] == 'jsio'):
  151. path_components[0] = prefix
  152. log.debug(path_components)
  153. return join_paths(*path_components) + '.js'
  154. def joinModulePath(a, b):
  155. if b[0] != '.':
  156. return b
  157. segments = a.split('.')
  158. while b[0] == '.':
  159. b = b[1:]
  160. segments.pop()
  161. output = '.'.join(segments) + '.' + b
  162. if output[0] == '.':
  163. output = output[1:]
  164. return output
  165. def extract_dependancies(src):
  166. dependancies = []
  167. re1 = re.compile("jsio\(\s*['\"]\s*(from|external)\s+([\w.$]+)\s+import\s+(.*?)\s*['\"]\s*\)")
  168. for item in re1.finditer(src):
  169. dependancies.append(item.groups()[1])
  170. re2 = re.compile("jsio\(\s*['\"]\s*import\s+(.*?)\s*['\"]\s*\)")
  171. re3 = re.compile("\s*([\w.$]+)(?:\s+as\s+([\w.$]+))?,?")
  172. for item in re2.finditer(src):
  173. for listItem in re3.finditer(item.groups()[0]):
  174. dependancies.append(listItem.groups()[0])
  175. return dependancies
  176. def remove_comments(src):
  177. output = ""
  178. i = 0
  179. while True:
  180. j = src.find('/*', i)
  181. if j == -1:
  182. output += src[i:]
  183. break
  184. output += src[i:j]
  185. k = src.find('*/', i)
  186. if k == -1:
  187. print 'unterminated comment detected'
  188. sys.exit(0)
  189. i = k+2
  190. output2 = ""
  191. for line in output.split('\n'):
  192. # XXX: Won't quite work with strings...
  193. line = line.split('//')[0]
  194. if line:
  195. output2 += line + '\n'
  196. return output2
  197. """
  198. soupselect.py
  199. CSS selector support for BeautifulSoup.
  200. soup = BeautifulSoup('<html>...')
  201. select(soup, 'div')
  202. - returns a list of div elements
  203. select(soup, 'div#main ul a')
  204. - returns a list of links inside a ul inside div#main
  205. """
  206. import re
  207. tag_re = re.compile('^[a-z0-9]+$')
  208. attribselect_re = re.compile(
  209. r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
  210. r'=?"?(?P<value>[^\]"]*)"?\]$'
  211. )
  212. # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
  213. # \---/ \---/\-------------/ \-------/
  214. # | | | |
  215. # | | | The value
  216. # | | ~,|,^,$,* or =
  217. # | Attribute
  218. # Tag
  219. def attribute_checker(operator, attribute, value=''):
  220. """
  221. Takes an operator, attribute and optional value; returns a function that
  222. will return True for elements that match that combination.
  223. """
  224. return {
  225. '=': lambda el: el.get(attribute) == value,
  226. # attribute includes value as one of a set of space separated tokens
  227. '~': lambda el: value in el.get(attribute, '').split(),
  228. # attribute starts with value
  229. '^': lambda el: el.get(attribute, '').startswith(value),
  230. # attribute ends with value
  231. '$': lambda el: el.get(attribute, '').endswith(value),
  232. # attribute contains value
  233. '*': lambda el: value in el.get(attribute, ''),
  234. # attribute is either exactly value or starts with value-
  235. '|': lambda el: el.get(attribute, '') == value \
  236. or el.get(attribute, '').startswith('%s-' % value),
  237. }.get(operator, lambda el: el.has_key(attribute))
  238. def select(soup, selector):
  239. """
  240. soup should be a BeautifulSoup instance; selector is a CSS selector
  241. specifying the elements you want to retrieve.
  242. """
  243. tokens = selector.split()
  244. current_context = [soup]
  245. for token in tokens:
  246. m = attribselect_re.match(token)
  247. if m:
  248. # Attribute selector
  249. tag, attribute, operator, value = m.groups()
  250. if not tag:
  251. tag = True
  252. checker = attribute_checker(operator, attribute, value)
  253. found = []
  254. for context in current_context:
  255. found.extend([el for el in context.findAll(tag) if checker(el)])
  256. current_context = found
  257. continue
  258. if '#' in token:
  259. # ID selector
  260. tag, id = token.split('#', 1)
  261. if not tag:
  262. tag = True
  263. el = current_context[0].find(tag, {'id': id})
  264. if not el:
  265. return [] # No match
  266. current_context = [el]
  267. continue
  268. if '.' in token:
  269. # Class selector
  270. tag, klass = token.split('.', 1)
  271. if not tag:
  272. tag = True
  273. found = []
  274. for context in current_context:
  275. found.extend(
  276. context.findAll(tag,
  277. {'class': lambda attr: attr and klass in attr.split()}
  278. )
  279. )
  280. current_context = found
  281. continue
  282. if token == '*':
  283. # Star selector
  284. found = []
  285. for context in current_context:
  286. found.extend(context.findAll(True))
  287. current_context = found
  288. continue
  289. # Here we should just have a regular tag
  290. if not tag_re.match(token):
  291. return []
  292. found = []
  293. for context in current_context:
  294. found.extend(context.findAll(token))
  295. current_context = found
  296. return current_context
  297. def monkeypatch(BeautifulSoupClass=None):
  298. """
  299. If you don't explicitly state the class to patch, defaults to the most
  300. common import location for BeautifulSoup.
  301. """
  302. if not BeautifulSoupClass:
  303. from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
  304. BeautifulSoupClass.findSelect = select
  305. def unmonkeypatch(BeautifulSoupClass=None):
  306. if not BeautifulSoupClass:
  307. from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
  308. delattr(BeautifulSoupClass, 'findSelect')
  309. #!/usr/bin/python
  310. # This code is original from jsmin by Douglas Crockford, it was translated to
  311. # Python by Baruch Even. The original code had the following copyright and
  312. # license.
  313. #
  314. # /* jsmin.c
  315. # 2007-05-22
  316. #
  317. # Copyright (c) 2002 Douglas Crockford (www.crockford.com)
  318. #
  319. # Permission is hereby granted, free of charge, to any person obtaining a copy of
  320. # this software and associated documentation files (the "Software"), to deal in
  321. # the Software without restriction, including without limitation the rights to
  322. # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  323. # of the Software, and to permit persons to whom the Software is furnished to do
  324. # so, subject to the following conditions:
  325. #
  326. # The above copyright notice and this permission notice shall be included in all
  327. # copies or substantial portions of the Software.
  328. #
  329. # The Software shall be used for Good, not Evil.
  330. #
  331. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  332. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  333. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  334. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  335. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  336. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  337. # SOFTWARE.
  338. # */
  339. from StringIO import StringIO
  340. # def jsmin(js):
  341. # ins = StringIO(js)
  342. # outs = StringIO()
  343. # JavascriptMinify().minify(ins, outs)
  344. # str = outs.getvalue()
  345. # if len(str) > 0 and str[0] == '\n':
  346. # str = str[1:]
  347. # return str
  348. def isAlphanum(c):
  349. """return true if the character is a letter, digit, underscore,
  350. dollar sign, or non-ASCII character.
  351. """
  352. return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
  353. (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126));
  354. class UnterminatedComment(Exception):
  355. pass
  356. class UnterminatedStringLiteral(Exception):
  357. pass
  358. class UnterminatedRegularExpression(Exception):
  359. pass
  360. class JavascriptMinify(object):
  361. def _outA(self):
  362. self.outstream.write(self.theA)
  363. def _outB(self):
  364. self.outstream.write(self.theB)
  365. def _get(self):
  366. """return the next character from stdin. Watch out for lookahead. If
  367. the character is a control character, translate it to a space or
  368. linefeed.
  369. """
  370. c = self.theLookahead
  371. self.theLookahead = None
  372. if c == None:
  373. c = self.instream.read(1)
  374. if c >= ' ' or c == '\n':
  375. return c
  376. if c == '': # EOF
  377. return '\000'
  378. if c == '\r':
  379. return '\n'
  380. return ' '
  381. def _peek(self):
  382. self.theLookahead = self._get()
  383. return self.theLookahead
  384. def _next(self):
  385. """get the next character, excluding comments. peek() is used to see
  386. if an unescaped '/' is followed by a '/' or '*'.
  387. """
  388. c = self._get()
  389. if c == '/' and self.theA != '\\':
  390. p = self._peek()
  391. if p == '/':
  392. c = self._get()
  393. while c > '\n':
  394. c = self._get()
  395. return c
  396. if p == '*':
  397. c = self._get()
  398. while 1:
  399. c = self._get()
  400. if c == '*':
  401. if self._peek() == '/':
  402. self._get()
  403. return ' '
  404. if c == '\000':
  405. raise UnterminatedComment()
  406. return c
  407. def _action(self, action):
  408. """do something! What you do is determined by the argument:
  409. 1 Output A. Copy B to A. Get the next B.
  410. 2 Copy B to A. Get the next B. (Delete A).
  411. 3 Get the next B. (Delete B).
  412. action treats a string as a single character. Wow!
  413. action recognizes a regular expression if it is preceded by ( or , or =.
  414. """
  415. if action <= 1:
  416. self._outA()
  417. if action <= 2:
  418. self.theA = self.theB
  419. if self.theA == "'" or self.theA == '"':
  420. while 1:
  421. self._outA()
  422. self.theA = self._get()
  423. if self.theA == self.theB:
  424. break
  425. if self.theA <= '\n':
  426. raise UnterminatedStringLiteral()
  427. if self.theA == '\\':
  428. self._outA()
  429. self.theA = self._get()
  430. if action <= 3:
  431. self.theB = self._next()
  432. if self.theB == '/' and (self.theA == '(' or self.theA == ',' or
  433. self.theA == '=' or self.theA == ':' or
  434. self.theA == '[' or self.theA == '?' or
  435. self.theA == '!' or self.theA == '&' or
  436. self.theA == '|' or self.theA == ';' or
  437. self.theA == '{' or self.theA == '}' or
  438. self.theA == '\n'):
  439. self._outA()
  440. self._outB()
  441. while 1:
  442. self.theA = self._get()
  443. if self.theA == '/':
  444. break
  445. elif self.theA == '\\':
  446. self._outA()
  447. self.theA = self._get()
  448. elif self.theA <= '\n':
  449. raise UnterminatedRegularExpression()
  450. self._outA()
  451. self.theB = self._next()
  452. def _jsmin(self):
  453. """Copy the input to the output, deleting the characters which are
  454. insignificant to JavaScript. Comments will be removed. Tabs will be
  455. replaced with spaces. Carriage returns will be replaced with linefeeds.
  456. Most spaces and linefeeds will be removed.
  457. """
  458. self.theA = '\n'
  459. self._action(3)
  460. while self.theA != '\000':
  461. if self.theA == ' ':
  462. if isAlphanum(self.theB):
  463. self._action(1)
  464. else:
  465. self._action(2)
  466. elif self.theA == '\n':
  467. if self.theB in ['{', '[', '(', '+', '-']:
  468. self._action(1)
  469. elif self.theB == ' ':
  470. self._action(3)
  471. else:
  472. if isAlphanum(self.theB):
  473. self._action(1)
  474. else:
  475. self._action(2)
  476. else:
  477. if self.theB == ' ':
  478. if isAlphanum(self.theA):
  479. self._action(1)
  480. else:
  481. self._action(3)
  482. elif self.theB == '\n':
  483. if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
  484. self._action(1)
  485. else:
  486. if isAlphanum(self.theA):
  487. self._action(1)
  488. else:
  489. self._action(3)
  490. else:
  491. self._action(1)
  492. def minify(self, instream, outstream):
  493. self.instream = instream
  494. self.outstream = outstream
  495. self.theA = '\n'
  496. self.theB = None
  497. self.theLookahead = None
  498. self._jsmin()
  499. self.instream.close()
  500. if __name__ == "__main__":
  501. main()