PageRenderTime 59ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/squeezeit/slimmer.py

http://github.com/samarudge/Squeezeit
Python | 676 lines | 608 code | 34 blank | 34 comment | 31 complexity | c301f3396f327160549aad1b2a2514dc MD5 | raw file
  1. #!/usr/bin/python
  2. """
  3. slimmer.py
  4. Peter Bengtsson, mail@peterbe.com, 2004-2006
  5. slimmer.py is a simple set of functions for compressing/optimizing
  6. HTML, XHTML and CSS documents as strings.
  7. Ideally used from other modules used something like this::
  8. >>> import slimmer
  9. >>> code = open('file.html').read()
  10. >>> slimmed = slimmer.xhtml_slimmer(code)
  11. >>> print len(code), len(slimmed)
  12. You have to estimate yourself if you think it's worth using slimmer
  13. on your documents if you're running a dynamic setting such as a
  14. web application (e.g. Zope with CheckoutableTemplates).
  15. On my PC I slimmed a 1MB .html document in 2.2 seconds and saved
  16. 100KB. Saved 31KB on a 110KB .css file in 0.063 seconds.
  17. And lastly, saved 17% in size in 0.016 seconds for www.python.org.
  18. Changes::
  19. 0.1.30 Nov 2009 Better at guessing HTML or XHTML
  20. 0.1.29 Nov 2008 New distutils release
  21. 0.1.28 Nov 2008 Added some tests that tests UTF-8 and EUC-JP HTML
  22. 0.1.27 Nov 2008 As a new distutils package
  23. 0.1.26 Oct 2007 Minor improvement to js_slimmer for 'var x = [...]'
  24. 0.1.25 Oct 2007 Slimming unicode text with hex colours like #FFFFFF
  25. caused an error relating to string.translate()
  26. 0.1.24 Sep 2007 <!--#include ... not removed in HTML slimmer
  27. 0.1.23 Apr 2007 Speedtest checks possibility of gzip
  28. 0.1.22 Jul 2006 Added function guessSyntax(code)
  29. 0.1.21 May 2006 Serious bug fix in _js_slimmer() with code like:
  30. '''for (var e in somearray)'''
  31. the result could be
  32. '''for (vareinsomearray)'''
  33. 0.1.20 Feb 2006 Incorporated new experimental --hardcore option
  34. 0.1.19 Feb 2006 Fixed bug in how js_slimmer() removes // comments
  35. 0.1.18 Jan 2006 Improved js_slimmer() floppy whitespace in parameter lists
  36. 0.1.17 Aug 2005 Fix in css_slimmer() for voice-family: hack (thanks Jens)
  37. 0.1.16 Jun 2005 Improved js_slimmer() for sloppy function definitions
  38. 0.1.15 Jun 2005 Improved js_slimmer() for sloppy if|else|else if statements
  39. 0.1.14 Apr 2005 Added unit test of Holly-hack for CSS
  40. 0.1.13 Apr 2005 Improved js_slimmer() to make 'y = 123;y = document;' to instead
  41. become 'y=123;y=document;'
  42. 0.1.12 Mar 2005 Fixed css_slimmer() to put a linebreak before //-->
  43. 0.1.11 Feb 2005 Fixed js_slimmer() for some curly bracket endings
  44. 0.1.10 Jan 2005 (Major patch by Baruch Even)
  45. - Fixed the -t option for testing, it didn't work, --test did work.
  46. - Fixed a typo s/whatspace/whitespace/
  47. - Fixed a bug were more than one consecutive space turned into nothing,
  48. added test 6 for this.
  49. - Revamped other code to completely eliminate end of lines. It works in
  50. FireFox 1.0
  51. - Changed the test cases to fit
  52. - Removed the last ; before } -> s/;}/}/
  53. - Changed the test cases to fit
  54. 0.1.9 Jan 2005 CLI interface can accept URLs
  55. 0.1.8 Dec 2004 Added an option (UNQUOTE_HTML_ATTRIBUTES) to remove
  56. quotes from HTML attributes. (default is off)
  57. 0.1.7 Dec 2004 Separate out from CheckoutableTemplates and __all__
  58. variable fixed for js_slimmer.
  59. 0.1.6 Dec 2004 Care for MacIE5 CSS Hack (http://www.sam-i-am.com/work/sandbox/css/mac_ie5_hack.html)
  60. 0.1.5 Nov 2004 Some improvements to js_slimmer()
  61. 0.1.4 Nov 2004 Added first draft of js_slimmer()
  62. 0.1.3 Nov 2004 Much improved CLI functions
  63. 0.1.2 Sep 2004 Added basic CLI functions (see run())
  64. 0.1.1 Sep 2004 Major speed improvment by removing
  65. the unquote_numerical feature.
  66. 0.1.0 Sep 2004 First version numbering
  67. """
  68. __version__='0.1.30'
  69. __all__=['acceptableSyntax','guessSyntax','slimmer','css_slimmer',
  70. 'html_slimmer','xhtml_slimmer','js_slimmer',
  71. '__version__']
  72. import re, os, sys, getopt
  73. import urllib2
  74. try:
  75. from js_function_slimmer import slim as js_function_slimmer
  76. except ImportError:
  77. js_function_slimmer = None
  78. ## Options
  79. #
  80. # If you're slimming HTML docs and really want to
  81. # convert border="0" to border=0, be aware that this
  82. # can take 5 times longer than without but compresses
  83. # the document at least twice as good.
  84. UNQUOTE_HTML_ATTRIBUTES = 0
  85. # Define the syntax options we accept
  86. HTML = 'html'
  87. XHTML = 'xhtml'
  88. CSS = 'css'
  89. JS = 'js'
  90. OK_SYNTAX = (HTML, XHTML, CSS, JS)
  91. def acceptableSyntax(syntax):
  92. """ return the syntax as we recognize it or None """
  93. syntax = str(syntax).lower().strip().replace(' ','').replace('-','')
  94. syntax = syntax.replace('stylesheet','css') # allow for alias
  95. syntax = syntax.replace('javascript','js') # allow for alias
  96. if syntax in OK_SYNTAX:
  97. return syntax
  98. else:
  99. return None
  100. some_javascript_code_regex = re.compile(
  101. 'function\(\)\s*{|var \w|return false;|return true;|'\
  102. 'function \w{2,15}\(|}\s*else if\s*\(')
  103. some_css_code_regex = re.compile('^#\w+\s*{|body\s*{|font-family:|margin:0|display:'\
  104. '|height:\s*\d|border:1px')
  105. _html_doctypes = ('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01',)
  106. _xhtml_doctypes = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional',
  107. '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict')
  108. some_xhtml_code_regex = re.compile('|'.join([re.escape(x) for x in _xhtml_doctypes])+\
  109. '|<html>|<title>|\s/>|<input|<a\s+', re.I)
  110. some_html_code_regex = re.compile('|'.join([re.escape(x) for x in _html_doctypes])+\
  111. '|<table|background=\"|<script|border=0|<!--', re.I)
  112. def guessSyntax(code):
  113. code = code.strip()
  114. if some_html_code_regex.findall(code):
  115. return HTML
  116. elif some_xhtml_code_regex.findall(code):
  117. return XHTML
  118. elif some_javascript_code_regex.findall(code):
  119. return JS
  120. elif some_css_code_regex.findall(code):
  121. return CSS
  122. else:
  123. # getting desperate but we shall prevail!
  124. if '</' in code:
  125. if '/>' in code or '/ >' in code:
  126. return XHTML
  127. return HTML
  128. return None
  129. def slimmer(code, syntax=XHTML, hardcore=False):
  130. """ wrap all function we have """
  131. if syntax == XHTML:
  132. return _xhtml_slimmer(code)
  133. elif syntax == HTML:
  134. return _html_slimmer(code)
  135. elif syntax == CSS:
  136. return _css_slimmer(code)
  137. elif syntax == JS:
  138. return _js_slimmer(code, slim_functions=bool(hardcore))
  139. try:
  140. import itertools
  141. def anyTrue(pred, seq):
  142. return True in itertools.imap(pred,seq)
  143. except ImportError:
  144. def anyTrue(pred, seq):
  145. for e in seq:
  146. if pred(e):
  147. return True
  148. return False
  149. # CSS
  150. css_comments = re.compile(r'/\*.*?\*/', re.MULTILINE|re.DOTALL)
  151. hex_colour = re.compile(r'#\w{2}\w{2}\w{2}')
  152. def _css_slimmer(css):
  153. """ remove repeating whitespace ( \t\n) """
  154. #css = css_comments.sub('', css) # remove comments
  155. remove_next_comment = 1
  156. for css_comment in css_comments.findall(css):
  157. if css_comment[-3:]=='\*/':
  158. remove_next_comment=0
  159. continue
  160. if remove_next_comment:
  161. css = css.replace(css_comment, '')
  162. else:
  163. remove_next_comment = 1
  164. css = re.sub(r'\s\s+', ' ', css) # >= 2 whitespace becomes one whitespace
  165. css = re.sub(r'\s+\n', '', css) # no whitespace before end of line
  166. # Remove space before and after certain chars
  167. for char in ('{', '}', ':', ';', ','):
  168. css = re.sub(char+r'\s', char, css)
  169. css = re.sub(r'\s'+char, char, css)
  170. css = re.sub(r'\s+</',r'</', css) # no extraspace before </style>
  171. css = re.sub(r'}\s(#|\w)', r'}\1', css)
  172. css = re.sub(r';}', r'}', css) # no need for the ; before end of attributes
  173. css = re.sub(r'}//-->', r'}\n//-->', css)
  174. css = simplifyHexColours(css)
  175. # voice-family hack. The declation: '''voice-family: "\"}\""''' requires
  176. # that extra space between the ':' and the first '"' which _css_slimmer()
  177. # removed. Put it back (http://real.issuetrackerproduct.com/0168)
  178. css = re.sub(r'voice-family:"\\"}\\""', r'voice-family: "\\"}\\""', css)
  179. return css.strip()
  180. # HTML
  181. f_IMD = re.I|re.MULTILINE|re.DOTALL
  182. f_MD = re.MULTILINE|re.DOTALL
  183. f_M = re.MULTILINE
  184. # the comment has to start with a space or a charater
  185. # otherwise me might remove a SSI include which can look like this:
  186. # <!--#include virtual="/include/myinclude.asp"-->
  187. html_comments_oneline = re.compile(r'<!--[\w\s].*?-->', re.I)
  188. html_inline_css = re.compile(r'<style.*?>.*?</style>', f_IMD)
  189. html_inline_js = re.compile(r'<script.*?>.*?</script>', f_IMD)
  190. any_tag = re.compile(r"<\w.*?>", f_IMD)
  191. excess_whitespace = re.compile(r' \s+|\s +', f_M)
  192. excess_whitespace1 = re.compile(r'\w\s+\w', f_M)
  193. excess_whitespace2 = re.compile(r'"\s+>', f_M)
  194. excess_whitespace3 = re.compile(r"'\s+>", f_M)
  195. excess_whitespace4 = re.compile('"\s\s+\w+="|\'\s\s+\w+=\'|"\s\s+\w+=|\'\s\s+\w+=', f_M)
  196. excess_whitespace6 = re.compile(r"\d\s+>", f_M)
  197. quotes_in_tag = re.compile('([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)"')
  198. def _html_slimmer(html, xml=0):
  199. """ Optimize like XHTML but go one step further """
  200. # 1. optimize inline CSS
  201. for styletag in html_inline_css.findall(html):
  202. html = html.replace(styletag, css_slimmer(styletag))
  203. # 2. optimize inline Javascript
  204. for scripttag in html_inline_js.findall(html):
  205. html = html.replace(scripttag, js_slimmer(scripttag))
  206. # 2. Remove excessive whitespace between tags
  207. html = re.sub(r'>\s+<','><', html)
  208. # 3. Remove oneline comments
  209. html = html_comments_oneline.sub('', html)
  210. # 4. In every tag, remove quotes on numerical attributes and all
  211. # excessive whitespace
  212. ew1 = excess_whitespace1 # shortcut
  213. ew6 = excess_whitespace6 # shortcut
  214. ew4 = excess_whitespace4 # shortcut
  215. for tag in uniqify(any_tag.findall(html)):
  216. # 4a. observe exceptions
  217. if tag.startswith('<!') or tag.find('</')>-1:
  218. continue
  219. original = tag
  220. # 4b. remove excess whitespace inside the tag
  221. tag= excess_whitespace2.sub('">', tag)
  222. tag= excess_whitespace3.sub("'>", tag)
  223. for each in ew1.findall(tag)+ew6.findall(tag):
  224. tag = tag.replace(each, excess_whitespace.sub(' ',each))
  225. for each in ew4.findall(tag):
  226. tag = tag.replace(each, each[0]+' '+each[1:].lstrip())
  227. # 4c. remove quotes
  228. if not xml and UNQUOTE_HTML_ATTRIBUTES:
  229. tag= quotes_in_tag.sub(r'\1=\2', tag)
  230. # has the tag been improved?
  231. if original != tag:
  232. html = html.replace(original, tag)
  233. return html.strip()
  234. def _xhtml_slimmer(xhtml):
  235. # currently not difference
  236. return _html_slimmer(xhtml, xml=1)
  237. excess_whitespace_js = re.compile('^\s+(\S)',re.MULTILINE)
  238. excess_whitespace_js2 = re.compile('(\S+);\s+(\S+)', re.MULTILINE)
  239. whitespaced_func_def = re.compile('(function)\s+(\S+\(.*?\))\s*{\s*(\S+)', f_IMD)
  240. whitespaced_func_def2 = re.compile('function\s*\(\)\s*{\s*(\S+)', f_IMD)
  241. js_comments_singlelines = re.compile('^//.*?$|\s+//.*?$', re.DOTALL|re.MULTILINE|re.I)
  242. js_comments_singlelines2 = re.compile('((^|;|\s)//.*?$)', re.DOTALL|re.MULTILINE|re.I)
  243. js_comment_end = re.compile('-->')
  244. js_comment_start = re.compile('(<!--(.*?))$\s(\w+)', re.MULTILINE)
  245. #js_comment_start2 = re.compile('(\<\!--(.*?)(\n+|[\r\n]+)\s*(\w+))', re.DOTALL|re.MULTILINE)
  246. whitespaced_controls = re.compile('(for|else if|if|catch|while)\s*\((.*?)\)\s*{\s*(\S+)', f_IMD)
  247. single_whitespaced_controls = re.compile('(try|else)\s*{\s*(\S+)', f_IMD)
  248. sloppy_conditionals = re.compile('\(\s*(\S+)\s*(==|!=)\s*(\S+)\)')
  249. sloppy_parameters = re.compile('\(([(\w+)\s,]+)\)')
  250. sloppy_ifs = re.compile('\s*(if|else if|else)\s*({|\()')
  251. sloppy_declarations = re.compile('var\s+(\w+)\s*=\s*(\d+|\w+|\"[\w+ ]\"|\[[\'\w \.,\"]+\])')
  252. sloppy_simple_declarations = re.compile('(\w+)\s*=\s*(\d+|\w+|\"[\w+ ]\")')
  253. sloppy_increments = re.compile('(\w+)\s*(\+=|-=)\s*(\d*|\"\w+\")')
  254. js_multiline_comments = re.compile(r'/\*.*?\*/', re.MULTILINE|re.DOTALL)
  255. closing_curly_brackets = re.compile(r'\s*}', re.MULTILINE)
  256. opening_curly_brackets = re.compile(r'{\s*', re.MULTILINE)
  257. def _js_slimmer(js, slim_functions=False):
  258. # 1. remove all whitespace starting every line
  259. js = excess_whitespace_js.sub(r'\1',js)
  260. # 2. Remove all /* multiline comments */
  261. js = js_multiline_comments.sub('',js)
  262. # 3. // style comments
  263. def _reject_slashslash_comment(match):
  264. if match.group().find('-->')==-1:
  265. return ''
  266. else:
  267. return match.group()
  268. js = js_comments_singlelines.sub(_reject_slashslash_comment, js)
  269. _="""
  270. for comment, start in js_comments_singlelines2.findall(js):
  271. # ...except those that contain -->
  272. replacewith = ''
  273. if start == ';':
  274. replacewith = ';'
  275. if not js_comment_end.findall(comment):
  276. js = js.replace(comment, replacewith)
  277. """
  278. js = js_comment_start.sub(r'<!--\n\3', js)
  279. # 3. excessive whitespace after semicolons
  280. js = excess_whitespace_js2.sub(r'\1;\2', js)
  281. # 4. functions defined with lots of whitespace
  282. js = whitespaced_func_def.sub(r'\1 \2{\3', js)
  283. js = whitespaced_func_def2.sub(r'function(){\1', js)
  284. # 5. control statements with lots of whitespace
  285. js = whitespaced_controls.sub(r'\1(\2){\3', js)
  286. # 6. control statements without params with lots of whitespace
  287. js = single_whitespaced_controls.sub(r'\1{\2', js)
  288. # 7. convert '(page == "foo")' to '(page=="foo")'
  289. js = sloppy_conditionals.sub(r'(\1\2\3)', js)
  290. # 8. convert '} else if {' to '}else if{'
  291. js = sloppy_ifs.sub(r'\1\2', js)
  292. # 9. convert 'var x = foo' to 'var x=foo'
  293. js = sloppy_declarations.sub(r'var \1=\2',js)
  294. js = sloppy_simple_declarations.sub(r'\1=\2', js)
  295. # 10. whitespace around closing } curly brackets
  296. js = opening_curly_brackets.sub('{', js)
  297. js = closing_curly_brackets.sub('}', js)
  298. # 11. Neater parameter lists
  299. #js = sloppy_parameters.sub(lambda m:m.group().replace(' ',''), js)
  300. def param_list_fixer(m):
  301. whole = m.group()
  302. params = m.groups()[0]
  303. return whole.replace(params,
  304. ','.join([x.strip() for x in params.split(',')]))
  305. js = sloppy_parameters.sub(param_list_fixer, js)
  306. # 12. sloppy increments
  307. js = sloppy_increments.sub(r'\1\2\3', js)
  308. if slim_functions and js_function_slimmer:
  309. js = js_function_slimmer(js)
  310. return js.strip()
  311. ## ----- Some fancier names
  312. ##
  313. def css_slimmer(css, hardcore=False):
  314. return _css_slimmer(css)
  315. def xhtml_slimmer(xhtml, hardcore=False):
  316. return _xhtml_slimmer(xhtml)
  317. def html_slimmer(html, hardcore=False):
  318. return _html_slimmer(html)
  319. def js_slimmer(js, hardcore=False):
  320. return _js_slimmer(js, slim_functions=bool(hardcore))
  321. ## ----- Methods related to simplifying HEX colour codes
  322. def uniqify(all):
  323. """ borrowed from Tim Peters' algorithm on ASPN Cookbook """
  324. # REMEMBER! This will shuffle the order of the list
  325. u = {}
  326. for each in all:
  327. u[each]=1
  328. return u.keys()
  329. def simplifyHexColours(text):
  330. """ Replace all colour declarations where pairs repeat.
  331. I.e. #FFFFFF => #FFF; #CCEEFF => #CEF
  332. and #EFEFEF, #EFCDI9 avoided """
  333. colour_replacements = {}
  334. all_hex_encodings = hex_colour.findall(text)
  335. for e in uniqify(all_hex_encodings):
  336. if e[1]==e[2] and e[3]==e[4] and e[5]==e[6]:
  337. colour_replacements[e] = '#'+e[1]+e[3]+e[5]
  338. for k, v in colour_replacements.items():
  339. text = text.replace(k, v)
  340. return text
  341. def __grr():
  342. print "Usage: python slimmer.py /path/to/input.html [xhtml|html|css|js] /path/to/output.html"
  343. def _pingable(url):
  344. try:
  345. urllib2.urlopen(url)
  346. return 1
  347. except:
  348. return 0
  349. def _is_openable_url(path_or_url):
  350. # looks like a URL?
  351. if path_or_url.lower().startswith('http'):
  352. return _pingable(path_or_url)
  353. else:
  354. return 0
  355. def __guess_syntax(filepath):
  356. lines = []
  357. if os.path.isfile(filepath) or _is_openable_url(filepath):
  358. if filepath.lower().endswith('.css'):
  359. return 'css'
  360. elif filepath.lower().endswith('.js'):
  361. return 'js'
  362. if os.path.isfile(filepath):
  363. f=open(filepath)
  364. else:
  365. f=urllib2.urlopen(filepath)
  366. line = f.readline()
  367. c = 0
  368. while len(lines) < 50 and line is not None:
  369. if line.strip():
  370. lines.append(line)
  371. line = f.readline()
  372. c += 1
  373. if c>100:
  374. break # paranoid safety
  375. f.close()
  376. lines_list = lines
  377. lines = '\n'.join([x for x in lines_list if x.find('!DOCTYPE')>-1])
  378. if lines.find('HTML 4.0')>-1:
  379. return 'html'
  380. elif lines.find('XHTML 1.0')>-1:
  381. return 'xhtml'
  382. elif lines.find('<html>') > -1:
  383. return 'html'
  384. else:
  385. lines = '\n'.join(lines_list)
  386. if lines.lower().find('<html') > -1:
  387. return 'html'
  388. if filepath.lower().endswith('.html') or \
  389. filepath.lower().endswith('.htm'):
  390. return 'html'
  391. return None
  392. usage="""slimmer.py Compress web files on the command line
  393. Peter Bengtsson, <mail@peterbe.com>, Nov 2004
  394. USAGE: python slimmer.py [OPTIONS] /path/to/input.html [xhtml|html|css|js]
  395. Options:
  396. -t, --test Perform a speed and compression test
  397. --output Save result to file
  398. --version Prints version and exits
  399. --hardcore Tries really hard but potentially slower
  400. -h, --help Prints this message
  401. If you don't specify the content type after the input filename,
  402. the program will try to guess it by opening the file and looking
  403. at the file extension.
  404. Examples:
  405. $ python slimmer.py index.html XHTML --output=index.optimized.html
  406. $ python slimmer.py --test screen.css
  407. """
  408. def __showversion():
  409. print __version__
  410. def __usage():
  411. print usage
  412. class Usage(Exception):
  413. def __init__(self, msg):
  414. self.msg = msg
  415. def main(argv=None):
  416. if argv is None:
  417. argv = sys.argv
  418. try:
  419. try:
  420. opts, args = getopt.getopt(argv[1:], "ho:vt",
  421. ["help", "output=", "version", "test", "hardcore"])
  422. except getopt.error, msg:
  423. raise Usage(msg)
  424. # more code, unchanged
  425. except Usage, err:
  426. print >>sys.stderr, err.msg
  427. print >>sys.stderr, "for help use --help"
  428. return 2
  429. outputfile = None
  430. speedtest = 0
  431. hardcore = False
  432. for o, a in opts:
  433. if o == "--version":
  434. __showversion()
  435. return 2
  436. elif o in ('-h', '--help'):
  437. __usage()
  438. return 3
  439. elif o in ('-o', '--output'):
  440. outputfile = a
  441. elif o in ("-t", "--test"):
  442. speedtest = 1
  443. elif o == '--hardcore':
  444. hardcore = True
  445. if not args:
  446. __usage()
  447. return 4
  448. syntax = None
  449. inputfile = None
  450. otherargs = []
  451. for arg in args:
  452. if arg in ('-t', '--test'):
  453. speedtest = 1
  454. elif arg.startswith('--output='):
  455. outputfile = arg[9:]
  456. elif acceptableSyntax(arg):
  457. syntax = acceptableSyntax(arg)
  458. elif os.path.isfile(arg) or _is_openable_url(arg):
  459. inputfile = arg
  460. else:
  461. otherargs.append(arg)
  462. if inputfile and syntax is None:
  463. syntax = __guess_syntax(inputfile)
  464. if inputfile is None:
  465. print >>sys.stderr, "No input file"
  466. print >>sys.stderr, "for help use --help"
  467. return 2
  468. if not acceptableSyntax(syntax):
  469. print >>sys.stderr, "Unrecognized syntax"
  470. print >>sys.stderr, "for help use --help"
  471. return 2
  472. if otherargs:
  473. print >>sys.stderr, "Unrecognized arguments %r"%otherargs
  474. print >>sys.stderr, "for help use --help"
  475. return 2
  476. run(inputfile, syntax, speedtest, outputfile, hardcore=hardcore)
  477. return 0
  478. from time import time
  479. def _gzipText(content):
  480. import cStringIO,gzip
  481. zbuf = cStringIO.StringIO()
  482. zfile = gzip.GzipFile(None, 'wb', 9, zbuf)
  483. zfile.write(content)
  484. zfile.close()
  485. return zbuf.getvalue()
  486. def run(inputfile, syntax, speedtest, outputfile, hardcore=False):
  487. if os.path.isfile(inputfile):
  488. contents = open(inputfile).read()
  489. else:
  490. contents = urllib2.urlopen(inputfile).read()
  491. t0=time()
  492. slimmed = slimmer(contents, syntax, hardcore=hardcore)
  493. t=time()-t0
  494. if speedtest:
  495. before = len(contents)
  496. after = len(slimmed)
  497. after_zlibbed = len(slimmed.encode('zlib'))
  498. after_gzip = len(_gzipText(slimmed))
  499. size_before = before
  500. if size_before > 100000:
  501. size_before = "%s (%sK)"%(size_before, size_before/1024)
  502. size_after = after
  503. if size_after > 100000:
  504. size_after = "%s (%sK)"%(size_after, size_after/1024)
  505. size_difference = before-after
  506. if size_difference > 10000:
  507. size_difference = "%s (%sK)"%(size_difference, size_difference/1024)
  508. print "Took %s seconds"%round(t, 3)
  509. print "Bytes before: %s"%size_before
  510. print "Bytes after: %s"%size_after
  511. print "Bytes after zlib: %s"%after_zlibbed
  512. print "Bytes after gzip: %s"%after_gzip
  513. print "Bytes saved: %s "%size_difference,
  514. print "(%s%% of original size)"%(100*round(after/float(before), 2))
  515. elif outputfile:
  516. open(outputfile, 'w').write(slimmed)
  517. else:
  518. print >>sys.stdout, slimmed
  519. if __name__=='__main__':
  520. sys.exit(main())