PageRenderTime 69ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/digsby/src/util/net.py

https://github.com/xiaohangyu/digsby
Python | 1518 lines | 1342 code | 123 blank | 53 comment | 90 complexity | e0054ab9283fab19e75a9ae40a0f6473 MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0
  1. from __future__ import with_statement
  2. import sys, traceback, re, struct, logging, random, StringIO
  3. import calendar, time, rfc822
  4. import socks, socket, asynchat
  5. import urllib, urllib2, urlparse
  6. import httplib, httplib2
  7. import cookielib
  8. import simplejson
  9. import itertools
  10. from httplib import HTTPConnection
  11. from httplib import NotConnected
  12. import primitives.funcs
  13. import proxy_settings
  14. from Events import EventMixin
  15. from callbacks import callsback
  16. try:
  17. sentinel
  18. except NameError:
  19. class Sentinel(object):
  20. def __repr__(self):
  21. return "<Sentinel (%r backup) %#x>" % (__file__, id(self))
  22. sentinel = Sentinel()
  23. log = logging.getLogger('util.net')
  24. default_chunksize = 1024 * 4
  25. def get_ips_s(hostname = ''):
  26. '''
  27. returns the ip addresses of the given hostname, default is '' (localhost)
  28. @param hostname: hostname to get the ips of
  29. @return ips as list of string
  30. '''
  31. # gethostbyname_ex returns tuple: (hostname, aliaslist, ipaddr_list)
  32. return socket.gethostbyname_ex(hostname or socket.gethostname())[2]
  33. def get_ips(hostname = ''):
  34. '''
  35. returns the ip addresses of the given hostname, default is '' (localhost)
  36. @param hostname: hostname to get the ips of
  37. @return ips as list of int
  38. '''
  39. return [socket.inet_aton(ip) for ip in get_ips_s(hostname)]
  40. myips = get_ips
  41. def myip():
  42. 'Returns the IP of this machine to the outside world.'
  43. return myips()[0]
  44. def ip_from_bytes(bytes):
  45. """
  46. Converts a long int to a dotted XX.XX.XX.XX quad string.
  47. thanks U{http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66517}
  48. """
  49. return socket.inet_ntoa(bytes)
  50. class FileChunker(object):
  51. 'asynchat producer to return chunks of a file'
  52. def __init__(self, fileobj, chunksize = default_chunksize, close_when_done = False,
  53. progress_cb = lambda bytes: None, bytecounter = None):
  54. self.fileobj = fileobj
  55. self.chunksize = chunksize
  56. self.close_when_done = close_when_done
  57. if bytecounter is None:
  58. bytecounter = fileobj.tell
  59. self.total = bytecounter()
  60. self.progress_cb = progress_cb
  61. self.cancelled = False
  62. def more(self):
  63. try:
  64. data_read = self.fileobj.read(self.chunksize)
  65. except ValueError:
  66. try: self.fileobj.close() #make sure it's good and dead
  67. except: pass
  68. return '' #same as what happens at end of file
  69. sz = len(data_read)
  70. if sz == 0 and self.close_when_done:
  71. self.fileobj.close()
  72. self.total += sz
  73. self.progress_cb(self.total)
  74. return data_read
  75. @classmethod
  76. def tofile(cls, sourcefile, outfile, progress_callback = lambda *a: None,
  77. bytecounter = None):
  78. gen = cls.tofile_gen(sourcefile, outfile, progress_callback, bytecounter)
  79. gen.next()
  80. try:
  81. gen.next()
  82. except StopIteration:
  83. pass
  84. @classmethod
  85. def tofile_gen(cls, sourcefile, outfile, progress_callback = lambda *a: None,
  86. bytecounter = None):
  87. fc = cls(sourcefile, close_when_done = True, bytecounter = bytecounter)
  88. yield fc
  89. chunk = fc.more()
  90. bytes_written = 0
  91. # localize for speed
  92. write, tell, more = outfile.write, outfile.tell, fc.more
  93. while chunk and not fc.cancelled:
  94. write(chunk)
  95. bytes_written += len(chunk)
  96. progress_callback(tell())
  97. chunk = more()
  98. outfile.close()
  99. class NoneFileChunker(FileChunker):
  100. def more(self):
  101. return super(NoneFileChunker, self).more() or None
  102. def httpjoin(base, path, keepquery=False):
  103. if path.startswith('http'):
  104. # path is already absolute
  105. return path
  106. else:
  107. joined = urlparse.urljoin(base, path)
  108. if not keepquery:
  109. parsed = list(urlparse.urlparse(joined))
  110. parsed[4] = ''
  111. return urlparse.urlunparse(parsed)
  112. else:
  113. return joined
  114. class UrlQuery(str):
  115. @classmethod
  116. def form_encoder(cls):
  117. return WebFormData
  118. @classmethod
  119. def parse(cls, url, parse_query = True, utf8=False):
  120. '''
  121. Return a mapping from a URL string containing the following keys:
  122. scheme://netloc/path;parameters?query#fragment
  123. If parse_query is True, "key=value&done" becomes
  124. {'key':'value', 'done':True} instead. Otherwise query is just a string.
  125. '''
  126. scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
  127. if parse_query:
  128. query = cls.form_encoder().parse(query, utf8=utf8)
  129. return dict(scheme = scheme, netloc = netloc, path = path,
  130. params = params, query = query, fragment = fragment)
  131. @classmethod
  132. def unparse(cls, scheme = '', netloc = '', path = '', params = '', query = None, fragment = ''):
  133. if query is None:
  134. query = {}
  135. if isinstance(query, dict):
  136. query = cls.form_encoder()(query)
  137. return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
  138. def __new__(cls, link, d={}, **kwargs):
  139. '''
  140. Splat **kwargs (and d) as URL parameters into the given url.
  141. The url must not already contain parameters (but may end with a
  142. question mark.)
  143. '''
  144. if not (d or kwargs):
  145. return str.__new__(cls, link)
  146. if link.endswith('?'):
  147. link = link[:-1]
  148. if '?' in link:
  149. joiner = '&' if (d or kwargs) else ''
  150. else:
  151. joiner = '?'
  152. return str.__new__(cls, ''.join([link, joiner, cls.form_encoder()(d = d, **kwargs)]))
  153. class WebFormData(str):
  154. @classmethod
  155. def encoder(cls):
  156. return urllib.urlencode
  157. def __new__(cls, d={}, **kwargs):
  158. if d and kwargs:
  159. kwargs.update(d)
  160. else:
  161. kwargs = kwargs or d
  162. base = cls.encoder()(kwargs)
  163. return str.__new__(cls, base)
  164. @classmethod
  165. def parse(cls, s, utf8=False):
  166. querymap = {}
  167. if not s:
  168. return querymap
  169. encoding = 'utf8url' if utf8 else 'url'
  170. for elem in s.split('&'):
  171. if '=' in elem:
  172. key, value = elem.split('=', 1)
  173. querymap[key] = value.decode(encoding)
  174. else:
  175. querymap[elem] = True
  176. return querymap
  177. class WebFormObjectData(WebFormData):
  178. @classmethod
  179. def encoder(cls):
  180. return urlencode_object
  181. class UrlQueryObject(UrlQuery):
  182. @classmethod
  183. def form_encoder(cls):
  184. return WebFormObjectData
  185. def urlencode_object(query):
  186. return urllib.urlencode(param(query))
  187. def param(a):
  188. s = list()
  189. def add(key, value):
  190. s.append((key, value))
  191. for prefix in a:
  192. buildParams( prefix, a[prefix], add );
  193. return s
  194. def buildParams(prefix, obj, add):
  195. if isinstance(obj, dict) and obj:
  196. for k,v in obj.items():
  197. buildParams( prefix + "[" + k + "]", v, add );
  198. elif isinstance(obj, list):
  199. for k, v in enumerate(obj):
  200. if not isinstance(v, (list,dict)):
  201. k = ''
  202. buildParams( prefix + "[" + str(k) + "]", v, add );
  203. else:
  204. add(prefix, obj)
  205. def int_to_ip(s, byteorder='<'):
  206. '''
  207. turns an int (or string as int) into a dotted IP address.
  208. ex: int_to_ip('580916604') --> '124.21.160.34'
  209. default byteorder is little-endian (for msn)
  210. '''
  211. return '.'.join(str(ord(c)) for c in struct.pack(byteorder+'I', int(s)))
  212. # matches two or more spaces in a row
  213. spacify_pattern = re.compile('( {2,})')
  214. def spacify_repl(m):
  215. l = len(m.group())
  216. if l == 2:
  217. return ' ' # "word joiner" unicode character is &#2060; but we're not using that anymore.
  218. else:
  219. # for more than two spaces use <SPACE><series of nbsp;s><SPACE>
  220. return ' ' + ''.join(['&nbsp;'] * (l-2)) + ' '
  221. def spacify(s):
  222. 'Turns consecutive spaces into a series of &nbsp; entities.'
  223. return spacify_pattern.sub(spacify_repl, s)
  224. #
  225. # a url for matching regexes
  226. #
  227. urlregex = re.compile(r'([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/]'
  228. '(([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}'
  229. "(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-']{0,1000}))?)"
  230. '[^\. <]')
  231. #
  232. # thanks http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
  233. #
  234. TLDs = \
  235. ['arpa', 'root', 'aero', 'asia', 'biz', 'com', 'coop', 'edu', 'gov', 'info', 'int',
  236. 'museum', 'name', 'net', 'org', 'pro', 'ac', 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am',
  237. 'an', 'ao', 'aq', 'ar', 'as', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd',
  238. 'be', 'bf', 'bg', 'bh', 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv',
  239. 'bw', 'by', 'bz', 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm',
  240. 'cn', 'co', 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do',
  241. 'dz', 'ec', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo',
  242. 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gp',
  243. 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu',
  244. 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo',
  245. 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la',
  246. 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md',
  247. 'me', 'mg', 'mh', 'mk', 'ml', 'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt',
  248. 'mu', 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl',
  249. 'no', 'np', 'nr', 'nu', 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl',
  250. 'pm', 'pn', 'pr', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw',
  251. 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn',
  252. 'so', 'sr', 'st', 'su', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj',
  253. 'tk', 'tl', 'tm', 'tn', 'to', 'tp', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug',
  254. 'uk', 'um', 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf',
  255. 'ws', 'ye', 'yt', 'yu', 'za', 'zm', 'zw'
  256. ]
  257. domains = '(?:%s)' % '|'.join(TLDs)
  258. '''
  259. the one true email regex(tm)
  260. complicated case and simple case:
  261. this_email-user.name+mylabel@bbc.co.uk
  262. name@domain
  263. '''
  264. email_regex_string = r'(?:([a-zA-Z0-9_][a-zA-Z0-9_\-\.]*)(\+[a-zA-Z0-9_\-\.]+)?@((?:[a-zA-Z0-9\-_]+\.?)*[a-zA-Z]{1,4}))'
  265. email_regex = re.compile(email_regex_string)
  266. email_wholestring_regex = re.compile('^' + email_regex_string + '$')
  267. is_email = primitives.funcs.ischeck(lambda s:bool(email_wholestring_regex.match(s)))
  268. class EmailAddress(tuple):
  269. def __new__(cls, addr, default_domain=sentinel):
  270. try:
  271. name, label, domain = parse_email(addr)
  272. except:
  273. if default_domain is sentinel:
  274. raise
  275. else:
  276. name, label, domain = parse_email(addr + '@' + default_domain)
  277. return tuple.__new__(cls, (name, label, domain.lower()))
  278. @property
  279. def name(self):
  280. return self[0]
  281. @property
  282. def label(self):
  283. return self[1]
  284. @property
  285. def domain(self):
  286. return self[2]
  287. def __str__(self):
  288. if self.label:
  289. return '%s+%s@%s' % self
  290. else:
  291. return '%s@%s' % (self.name, self.domain)
  292. def __repr__(self):
  293. return '<EmailAddress %s>' % (self,)
  294. def parse_email(s):
  295. match = email_wholestring_regex.match(s)
  296. if match is None:
  297. raise ValueError('Not a valid email address: %r', s)
  298. user, lbl, dom = match.groups()
  299. if lbl:
  300. lbl = lbl.strip('+')
  301. else:
  302. lbl = ''
  303. return user, lbl, dom
  304. protocols = 'ftp|https?|gopher|msnim|icq|telnet|nntp|aim|file|svn|svn+(?:\w)+'
  305. # for these TLDs, only a few have ever been allowed to be registered.
  306. single_letter_rule_tlds = frozenset(('net', 'com', 'org'))
  307. allowed_single_letter_domains = frozenset(('i.net', 'q.com', 'q.net', 'x.com', 'x.org', 'z.com'))
  308. linkify_url_pattern = re.compile(
  309. # thanks textile
  310. r'''(?=[a-zA-Z0-9]) # Must start correctly
  311. ((?: # Match the leading part (proto://hostname, or just hostname)
  312. (?:(?P<protocol>%s) # protocol
  313. :// # ://
  314. (?: # Optional 'username:password@'
  315. (?P<username>\w+) # username
  316. (?::(?P<password>\w+))? # optional :password
  317. @ # @
  318. )?)? #
  319. (?P<hostname> # hostname (sub.example.com). single-letter
  320. (?:[iqxz]|(?:[-\w\x7F-\xFF]+)) # domains are not allowed, except those listed:
  321. (?:\.[\w\x7F-\xFF][-\w\x7F-\xFF]*)*) # http://en.wikipedia.org/wiki/Single-letter_second-level_domains
  322. )? #
  323. (?::(?P<port>\d+))? # Optional port number
  324. (?P<selector>
  325. (?: # Rest of the URL, optional
  326. /? # Start with '/'
  327. [^.!,?;:"<>\[\]{}\s\x7F-\xFF]+ # Can't start with these
  328. (?: #
  329. [.!,?;:]+ # One or more of these
  330. [^.!,?;:"<>{}\s\x7F-\xFF]+ # Can't finish with these
  331. #'" # # or ' or "
  332. )*) #
  333. )?) #
  334. ''' % protocols, re.VERBOSE)
  335. def isurl(text):
  336. m = linkify_url_pattern.match(text)
  337. if not m: return False
  338. protocol, host = m.group('protocol'), m.group('hostname')
  339. if host is not None:
  340. # only allow links without protocols (i.e., www.links.com)
  341. # if the TLD is one of the allowed ones
  342. if protocol is None:
  343. myTLDs = (host.split('.') if '.' in host else [host])
  344. if len(myTLDs) < 2 or myTLDs[-1] not in TLDs:
  345. return False
  346. return True
  347. class LinkAccumulator(object):
  348. def __init__(self, s=None):
  349. self.links = []
  350. self.spans = []
  351. if s is not None:
  352. linkify_url_pattern.sub(self.repl, s)
  353. def repl(self, m):
  354. url, protocol, after = _url_from_match(m)
  355. if url is None:
  356. return ''
  357. href = ('http://' + url) if protocol is None else url
  358. self.links.append(href)
  359. self.spans.append(m.span())
  360. return ''
  361. def __iter__(self):
  362. return itertools.izip(self.links, self.spans)
  363. def find_links(text):
  364. return LinkAccumulator(text).links
  365. def _url_from_match(m):
  366. protocol, host = m.group('protocol'), m.group('hostname')
  367. url = m.group()
  368. # fix urls (surrounded by parens)
  369. after = ''
  370. if url.endswith(')') and '(' not in url:
  371. url = url[:-1]
  372. after = ')'
  373. if host is not None:
  374. myTLDs = (host.split('.') if '.' in host else [host])
  375. # only allow links without protocols (i.e., www.links.com)
  376. # if the TLD is one of the allowed ones
  377. if protocol is None:
  378. if len(myTLDs) < 2 or myTLDs[-1] not in TLDs:
  379. return None, None, None
  380. if len(myTLDs) >= 2:
  381. # don't allow single letter second level domains unless they are in the list
  382. # of allowed ones above
  383. second_level_domain = myTLDs[-2]
  384. top_level_domain = myTLDs[-1]
  385. if (len(second_level_domain) == 1
  386. and '.'.join((second_level_domain, top_level_domain)) not in allowed_single_letter_domains
  387. and top_level_domain in single_letter_rule_tlds):
  388. # "cancel" the replacement
  389. return None, None, None
  390. return url, protocol, after
  391. return None, None, None
  392. def _dolinkify(text):
  393. def repl(m):
  394. url, protocol, after = _url_from_match(m)
  395. if url is None:
  396. i, j = m.span()
  397. return text[i:j]
  398. href = ('http://' + url) if protocol is None else url
  399. return '<a href="%s">%s</a>' % (href, url) + after # TODO: add 'target="_blank"' ?
  400. #text = email_regex.sub(r'<a href="mailto:\1">\1</a>', text)
  401. text = linkify_url_pattern.sub(repl, text)
  402. return text
  403. def linkify(text):
  404. if isinstance(text, unicode):
  405. return _linkify(text.encode('utf-8')).decode('utf-8')
  406. else:
  407. return _linkify(text)
  408. def _linkify(text):
  409. # Linkify URL and emails.
  410. # If there is no html, do a simple search and replace.
  411. if not re.search(r'''<.*>''', text):
  412. return _dolinkify(text)
  413. # Else split the text into an array at <>.
  414. else:
  415. lines = []
  416. prev_line = ''
  417. for line in re.split('(<.*?>)', text):
  418. if not re.match('<.*?>', line) and not prev_line.startswith('<a'):
  419. line = _dolinkify(line)
  420. prev_line = line
  421. lines.append(line)
  422. return ''.join(lines)
  423. class QueueableMixin(object):
  424. def __init__(self):
  425. object.__init__(self)
  426. self._on_queue = False
  427. def queue(self):
  428. # Adds this object to its socket's queue
  429. # if it's not already there.
  430. if not self._on_queue:
  431. self._queue()
  432. self._on_queue = True
  433. def unqueue(self):
  434. if self._on_queue:
  435. self._unqueue()
  436. self._on_queue = False
  437. class ProducerQueuable(QueueableMixin):
  438. def __init__(self, sck):
  439. QueueableMixin.__init__(self)
  440. self.sck = sck
  441. def _queue(self):
  442. self.sck.push_with_producer(self)
  443. def _unqueue(self):
  444. try:
  445. self.sck.producer_fifo.remove(self)
  446. except ValueError:
  447. pass
  448. class RoundRobinProducer(ProducerQueuable):
  449. def __init__(self, sck):
  450. ProducerQueuable.__init__(self, sck)
  451. self.list = []
  452. def add(self, prod):
  453. # Adds a producer to our list, and checks that it has
  454. # a callable "more" attribute.
  455. try:
  456. if not callable(prod.more):
  457. raise AssertionError('Producers must have a "more" method')
  458. except:
  459. traceback.print_exc()
  460. raise
  461. self.unqueue()
  462. self.list.append(prod)
  463. self.queue()
  464. def more(self):
  465. # If this is getting called, we *must* be on the queue
  466. self._on_queue = True
  467. d = None
  468. l = self.list
  469. prod = None
  470. while (not d) and l:
  471. prod = l.pop(0)
  472. d = prod.more()
  473. # if we got data, loop will break
  474. # if not, the "bad" producer is gone
  475. # end loop
  476. if d:
  477. # that producer is still good -- put it on the end.
  478. l.append(prod)
  479. else:
  480. # Didn't get any data. We're going to be removed
  481. # from the socket FIFO
  482. # if prod is None and not self.list:
  483. # print 'List of producers was empty, returning None'
  484. # else:
  485. # print 'Didn\'t get any data from %r' % prod
  486. self.unqueue()
  487. if self.list:
  488. self.queue()
  489. return d
  490. class PriorityProducer(ProducerQueuable):
  491. def __init__(self, sck):
  492. ProducerQueuable.__init__(self, sck)
  493. self.high = []
  494. self.mid = []
  495. self.low = []
  496. def add(self, prod, pri='mid'):
  497. assert callable(prod.more)
  498. assert pri in ('high', 'mid', 'low')
  499. self.unqueue()
  500. getattr(self, pri).append(prod)
  501. self.queue()
  502. def more(self):
  503. # If this is getting called, we *must* be on the queue
  504. self._on_queue = True
  505. d = None
  506. for l in (self.high, self.mid, self.low):
  507. if not l: continue
  508. while not d and l:
  509. prod = l.pop(0)
  510. d = prod.more()
  511. if d:
  512. # Put the producer back where we got it
  513. l.insert(0, prod)
  514. break
  515. # if not d:
  516. # self.unqueue()
  517. return d
  518. class HTTPConnProgress(HTTPConnection):
  519. 'Subclass of HTTPConnection which sends a file object, reporting progress.'
  520. def send_file_cb(self, fileobj, progress_cb, blocksize = default_chunksize, progressDelta = 0):
  521. 'Sends the contents of fileobj (a .read-able object) to server.'
  522. if self.sock is None:
  523. if self.auto_open:
  524. self.connect()
  525. else:
  526. raise NotConnected()
  527. if self.debuglevel > 0:
  528. print "sending contents of", fileobj
  529. try:
  530. read = fileobj.read
  531. sendall = self.sock.sendall
  532. chunk = read(blocksize)
  533. total = 0
  534. while chunk:
  535. total += len(chunk)
  536. sendall(chunk)
  537. progress_cb(total - progressDelta)
  538. chunk = read(blocksize)
  539. except socket.error, v:
  540. if v[0] == 32: # Broken pipe
  541. self.close()
  542. raise
  543. class SocketEventMixin(EventMixin):
  544. events = EventMixin.events | set(("connected",
  545. "connection_failed",
  546. "socket_error",
  547. "socket_closed",
  548. ))
  549. def post_connect_error(self, e=None):
  550. self.event("socket_error")
  551. self.post_connect_disconnect()
  552. def post_connect_expt(self):
  553. self.event("socket_error")
  554. self.post_connect_disconnect()
  555. def post_connect_disconnect(self):
  556. self.close()
  557. self.event("socket_closed")
  558. def post_connect_close(self):
  559. self.close()
  560. self.event("socket_closed")
  561. def reassign(self):
  562. self.handle_expt = self.post_connect_expt
  563. self.handle_error = self.post_connect_error
  564. self.handle_close = self.post_connect_close
  565. self.do_disconnect = self.post_connect_disconnect
  566. def build_cookie(name, value,
  567. version = 0,
  568. domain = sentinel,
  569. port = sentinel,
  570. path = sentinel,
  571. secure = False,
  572. expires = None,
  573. discard = False,
  574. comment = None,
  575. comment_url = None,
  576. rest = {'httponly' : None},
  577. rfc2109 = False):
  578. if domain is sentinel:
  579. domain = None
  580. domain_specified = False
  581. domain_initial_dot = False
  582. else:
  583. domain_specified = True
  584. domain_initial_dot = domain.startswith('.')
  585. if port is sentinel:
  586. port = None
  587. port_specified = False
  588. else:
  589. port_specified = True
  590. if path is sentinel:
  591. path = None
  592. path_specified = False
  593. else:
  594. path_specified = True
  595. return cookielib.Cookie(**locals())
  596. def GetSocketType():
  597. d = GetProxyInfo()
  598. if d:
  599. #socks.setdefaultproxy(**GetProxyInfo())
  600. return socks.socksocket
  601. else:
  602. return socket.socket
  603. NONE = 'NONPROX'
  604. SYSDEFAULT = 'SYSPROX'
  605. CUSTOM = "SETPROX"
  606. def GetProxyInfo():
  607. ps = proxy_settings
  608. try:
  609. pd = ps.get_proxy_dict()
  610. except Exception, e:
  611. print >>sys.stderr, 'No proxies because: %r' % e
  612. pd = {}
  613. get = pd.get
  614. proxytype= get('proxytype')
  615. port = get('port')
  616. try:
  617. port = int(port)
  618. except:
  619. port = None
  620. addr = get('addr')
  621. username = get('username')
  622. password = get('password')
  623. override = get('override')
  624. rdns = get('rdns', False)
  625. try:
  626. override = int(override)
  627. except:
  628. if override not in (SYSDEFAULT, CUSTOM, NONE):
  629. override = SYSDEFAULT
  630. else:
  631. # Hack for old proxy code that only had 2 options
  632. if override:
  633. override = CUSTOM
  634. else:
  635. override = SYSDEFAULT
  636. if override == NONE:
  637. return {}
  638. elif override == SYSDEFAULT:
  639. px = urllib._getproxies()
  640. if not px: return {}
  641. url = px.get('http', None)
  642. if url is None: return {}
  643. url = urlparse.urlparse(url)
  644. addr = url.hostname or ''
  645. if not addr: return {}
  646. port = url.port or 80
  647. username = url.username or username or None
  648. password = url.password or password or None
  649. proxytype = 'http'
  650. # d = {}
  651. # if all((addr, port)):
  652. # d.update(addr=addr, port=port, proxytype=proxytype)
  653. #
  654. # if all((username, password)):
  655. # d.update(username=username, password=password)
  656. if all((type, port, addr)):
  657. proxytype=getattr(socks, ('proxy_type_%s'%proxytype).upper(), None)
  658. return dict(addr=addr, port=port, username=username, password=password, proxytype=proxytype, rdns = rdns)
  659. else:
  660. return {}
  661. def GetProxyInfoHttp2():
  662. i = GetProxyInfo()
  663. if not i:
  664. return None
  665. return httplib2.ProxyInfo(proxy_type = i['proxytype'],
  666. proxy_host = i['addr'],
  667. proxy_port = i['port'],
  668. proxy_user = i['username'],
  669. proxy_pass = i['password'],
  670. proxy_rdns = i.get('rdns', False),
  671. )
  672. def getproxies_digsby():
  673. '''
  674. A replacement for urllib's getproxies that returns the digsby app settings.
  675. the return value is a dictionary with key:val as:
  676. 'http' : 'http://user:pass@proxyhost:proxyport'
  677. the dictionary can be empty, indicating that there are no proxy settings.
  678. '''
  679. pinfo = GetProxyInfo()
  680. proxies = {}
  681. if pinfo.get('username', None) and pinfo.get('password', None):
  682. unpw = '%s:%s@' % (pinfo['username'], pinfo['password'])
  683. else:
  684. unpw = ''
  685. if pinfo.get('port', None):
  686. port = ':' + str(pinfo['port'])
  687. else:
  688. port = ''
  689. host = pinfo.get('addr', None)
  690. if not host:
  691. return proxies # empty dict
  692. all = unpw + host + port
  693. proxies = urllib.OneProxy()
  694. proxies._proxyServer = all
  695. if pinfo['proxytype'] != socks.PROXY_TYPE_HTTP:
  696. proxy_url = ('socks%d://' % (4 if pinfo['proxytype'] == socks.PROXY_TYPE_SOCKS4 else 5)) + all
  697. return dict(socks=proxy_url, http=proxy_url, https=proxy_url)
  698. proxies['https'] = 'http://' + all
  699. proxies['http'] = 'http://' + all
  700. proxies['ftp'] = 'http://' + all
  701. return proxies
  702. class SocksProxyHandler(urllib2.ProxyHandler):
  703. '''
  704. Handles SOCKS4/5 proxies as well as HTTP proxies.
  705. '''
  706. handler_order = 100
  707. def proxy_open(self, req, type):
  708. try:
  709. req._proxied
  710. except AttributeError:
  711. proxyinfo = self.proxies.get(type, '')
  712. proxytype = urllib2._parse_proxy(proxyinfo)[0]
  713. if proxytype is None:
  714. req._proxied = False
  715. return urllib2.ProxyHandler.proxy_open(self, req, type)
  716. else:
  717. req._proxytype = proxytype
  718. req._proxied = True
  719. if proxytype == 'http' and type != 'https': # Http proxy
  720. return urllib2.ProxyHandler.proxy_open(self, req, type)
  721. else:
  722. return None
  723. else:
  724. # Already proxied. skip it.
  725. return None
  726. def socks4_open(self, req):
  727. return self.socks_open(req, 4)
  728. def socks5_open(self, req):
  729. return self.socks_open(req, 5)
  730. def socks_open(self, req, sockstype):
  731. orig_url_type, __, __, orighostport = urllib2._parse_proxy(req.get_full_url())
  732. req.set_proxy(orighostport, orig_url_type)
  733. endpoint = req.get_host()
  734. if ':' in endpoint:
  735. host, port = endpoint.rsplit(':', 1)
  736. port = int(port)
  737. else:
  738. host, port = endpoint, 80
  739. req._proxied = True
  740. return self.parent.open(req)
  741. try:
  742. import ssl
  743. except ImportError:
  744. pass
  745. else:
  746. class SocksHttpsOpener(urllib2.HTTPSHandler):
  747. handler_order = 101
  748. def https_open(self, req):
  749. if getattr(req, '_proxied', False) and getattr(req, '_proxytype', None) is not None:
  750. return urllib2.HTTPSHandler.do_open(self, SocksHttpsConnection, req)
  751. else:
  752. return urllib2.HTTPSHandler.https_open(self, req)
  753. class SocksHttpsConnection(httplib.HTTPSConnection):
  754. _sockettype = socks.socksocket
  755. def connect(self):
  756. "Connect to a host on a given (SSL) port."
  757. pd = urllib.getproxies().get('https', None)
  758. if pd is None:
  759. sockstype = ''
  760. else:
  761. sockstype, user, password, hostport = urllib2._parse_proxy(pd)
  762. assert ':' in hostport # if we don't have a port we're screwed
  763. host, port = hostport.rsplit(':', 1)
  764. port = int(port)
  765. sock = self._sockettype(socket.AF_INET, socket.SOCK_STREAM)
  766. sock.setproxy(proxytype=getattr(socks, 'PROXY_TYPE_%s' % sockstype.upper()), addr=host, port=port, rdns=True, username=user, password=password)
  767. sock.connect((self.host, self.port))
  768. self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
  769. class SocksHttpOpener(urllib2.HTTPHandler):
  770. handler_order = 101
  771. def http_open(self, req):
  772. proxytype = getattr(req, '_proxytype', None)
  773. if getattr(req, '_proxied', False) and proxytype not in ('http', None):
  774. return urllib2.HTTPHandler.do_open(self, SocksConnection, req)
  775. else:
  776. return urllib2.HTTPHandler.http_open(self, req)
  777. class SocksConnection(httplib.HTTPConnection):
  778. _sockettype = socks.socksocket
  779. def connect(self):
  780. #- Parse proxies
  781. pd = urllib.getproxies().get('http', None)
  782. if pd is None:
  783. sockstype = ''
  784. else:
  785. sockstype, user, password, hostport = urllib2._parse_proxy(pd)
  786. if 'socks' not in sockstype:
  787. return httplib.HTTPConnection.connect(self)
  788. assert ':' in hostport # if we don't have a port we're screwed
  789. host, port = hostport.rsplit(':', 1)
  790. port = int(port)
  791. for res in socket.getaddrinfo(self.host, self.port, 0, socket.SOCK_STREAM):
  792. af, socktype, proto, canonname, sa = res
  793. try:
  794. self.sock = self._sockettype(af, socktype, proto)
  795. self.sock.setproxy(proxytype=getattr(socks, 'PROXY_TYPE_%s' % sockstype.upper()), addr=host, port=port, rdns=False, username=user, password=password)
  796. #- The rest is the same as superclass
  797. if self.debuglevel > 0:
  798. print "connect: (%s, %s)" % (self.host, self.port)
  799. self.sock.connect(sa)
  800. except socket.error, msg:
  801. if self.debuglevel > 0:
  802. print 'connect fail:', (self.host, self.port)
  803. if self.sock:
  804. self.sock.close()
  805. self.sock = None
  806. continue
  807. break
  808. if not self.sock:
  809. raise socket.error, msg
  810. class DigsbyHttpProxyPasswordManager(urllib2.HTTPPasswordMgr):
  811. def find_user_password(self, realm, uri):
  812. pi = GetProxyInfo()
  813. return (pi['username'] or None), (pi['password'] or None)
  814. if not hasattr(urllib, '_getproxies'):
  815. urllib._getproxies, urllib.getproxies = urllib.getproxies, getproxies_digsby
  816. urllib2.UnknownHandler.handler_order = sys.maxint # This should be last, no matter what
  817. # BaseHandler comes first in the superclass list, but it doesn't take any arguments.
  818. # This causes problems when initializing the class with an argument.
  819. urllib2.ProxyDigestAuthHandler.__bases__ = urllib2.ProxyDigestAuthHandler.__bases__[::-1]
  820. urllib2.ProxyBasicAuthHandler.handler_order = 499 # Make sure it comes before the 'default' error handler
  821. httplib2.ProxyInfo.get_default_proxy = staticmethod(GetProxyInfoHttp2)
  822. def GetDefaultHandlers():
  823. handlers = [SocksProxyHandler, SocksHttpOpener]
  824. httpsopener = globals().get('SocksHttpsOpener', None)
  825. if httpsopener is not None:
  826. handlers.append(httpsopener)
  827. pwdmgr = DigsbyHttpProxyPasswordManager()
  828. for auth_handler_type in (urllib2.ProxyBasicAuthHandler, urllib2.ProxyDigestAuthHandler):
  829. handlers.append(auth_handler_type(pwdmgr))
  830. return handlers
  831. def build_opener(*a, **k):
  832. if 'default_classes' not in k:
  833. k['default_classes'] = GetDefaultHandlers() + urllib2.default_opener_classes
  834. return urllib2.build_opener(*a, **k)
  835. opener = urllib2.build_opener(*GetDefaultHandlers())
  836. # for handler in opener.handlers:
  837. # handler._debuglevel = 1
  838. urllib2.install_opener(opener)
  839. _hostprog = re.compile('^//([^/?]*)(.*)$')
  840. def splithost(url):
  841. """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
  842. match = _hostprog.match(url)
  843. if match:
  844. groups = match.group(1, 2)
  845. # Check if we're throwing out a slash accidentally. if so, put it back and return.
  846. if groups[0] == '':
  847. return groups[0], '/' + groups[1]
  848. else:
  849. return groups
  850. return None, url
  851. urllib.splithost = urllib2.splithost = splithost # urllib2 imports it "from" urllib so we have to replace its copy as well
  852. def _HTTPError__repr(self):
  853. if not hasattr(self, 'content'):
  854. try:
  855. self.content = self.read()
  856. self.close()
  857. except Exception, e:
  858. self._error = e
  859. self.content = "error reading body: %r" % e
  860. self.read = lambda: ''
  861. else:
  862. self._stringio = StringIO.StringIO(self.content)
  863. self.read = self._stringio.read
  864. etxt = self.content
  865. return '<HTTPError headers = %r, body = %r>' % (str(getattr(self, 'hdrs', {})), etxt)
  866. urllib2.HTTPError.__repr__ = _HTTPError__repr
  867. def httpok(_code):
  868. return getattr(_code, 'status', _code)//100 == 2
  869. class SimpleProducer(asynchat.simple_producer):
  870. def more(self):
  871. data = asynchat.simple_producer.more(self)
  872. if data == '':
  873. data = None
  874. return data
  875. class CallbackProducerMixin(object):
  876. '''
  877. Simple mixin for producer classes that calls callback.success() when all data has been read from it (via more()).
  878. Must be listed before the producer type in the inheritance list, for method resolution to work correctly.
  879. '''
  880. def __init__(self):
  881. bases = self.__class__.__bases__
  882. found_self = False
  883. self._siblingClass = None
  884. for base in bases:
  885. if base is CallbackProducerMixin:
  886. found_self = True
  887. else:
  888. if hasattr(base, 'more') and found_self:
  889. self._siblingClass = base
  890. break
  891. if self._siblingClass is None:
  892. raise AssertionError("This mix-in requires there is a sibling class with a 'more' method. "
  893. "Additionally, CallbackProducerMixin must be *before* that class in the inheritance list "
  894. "(for method resolution reasons).")
  895. @callsback
  896. def set_callback(self, callback=None):
  897. self._callback = callback
  898. def more(self):
  899. if not hasattr(self, '_siblingClass'):
  900. result = None
  901. else:
  902. result = self._siblingClass.more(self)
  903. if result is None:
  904. if getattr(self, '_callback', None) is not None:
  905. self._callback.success()
  906. if getattr(self, '_callback', None) is not None:
  907. del self._callback
  908. return result
  909. class SimpleCallbackProducer(CallbackProducerMixin, SimpleProducer):
  910. '''
  911. Subclass of asynchat.simple_producer that calls self._callback.success() when all data has
  912. been exhausted. Set callback after instantiation with set_callback() method.
  913. SimpleCallbackProducer(data, buffer_size=512)
  914. '''
  915. def __init__(self, data):
  916. SimpleProducer.__init__(self, data)
  917. CallbackProducerMixin.__init__(self)
  918. ## Add a 'remove' method to asynchat's fifo
  919. def _fifo_remove(self, val):
  920. '''
  921. returns True if value was found + removed, false otherwise.
  922. '''
  923. # self.list is a deque
  924. try:
  925. self.list.remove(val)
  926. except Exception:
  927. return False
  928. else:
  929. return True
  930. asynchat.fifo.remove = _fifo_remove
  931. @callsback
  932. def producer_cb(data, callback=None):
  933. '''
  934. producer(data, success=callable)
  935. Facade for SimpleCallbackProducer.
  936. '''
  937. prod = SimpleCallbackProducer(data)
  938. prod.set_callback(callback=callback)
  939. return prod
  940. class GeneratorProducer(object):
  941. def __init__(self, gen):
  942. self.gen = gen
  943. def more(self):
  944. if self.gen is None:
  945. return None
  946. try:
  947. return self.gen.next()
  948. except StopIteration:
  949. self.gen = None
  950. return None
  951. # from tr.im:
  952. _short_domains = frozenset((
  953. '2big.at', '2me.tw', '3.ly', 'a.gd', 'a2n.eu', 'abbrr.com',
  954. 'adjix.com', 'arunaurl.com', 'beam.to', 'bit.ly',
  955. 'bitly.com', 'bkite.com', 'blip.fm', 'bloat.me',
  956. 'budurl.com', 'burnurl.com', 'canurl.com', 'chilp.it',
  957. 'cli.gs', 'decenturl.com', 'digg.com', 'digs.by', 'dn.vc',
  958. 'doiop.com', 'durl.us', 'dwarfurl.com', 'easyuri.com',
  959. 'easyurl.net', 'ff.im', 'fon.gs', 'fyiurl.com', 'ginx.com',
  960. 'goo.gl', 'go2.me', 'hex.io', 'hopurl.com', 'hurl.ws', 'icanhaz.com',
  961. 'idek.net', 'is.gd', 'ix.it', 'jijr.com', 'jmp2.net',
  962. 'knol.me', 'krz.ch', 'kurl.us', 'last.fm', 'lin.cr',
  963. 'lnk.in', 'makeitbrief.com', 'memurl.com', 'micurl.com',
  964. 'minu.ws', 'moourl.com', 'myturl.com', 'notlong.com', 'ow.ly',
  965. 'pic.im', 'pikchur.com', 'ping.fm', 'piurl.com', 'poprl.com',
  966. 'qurlyq.com', 'r.im', 'refurl.com', 'rubyurl.com', 'rurl.org',
  967. 'rurl.us', 's7y.us', 'sai.ly', 'sbt.sh', 'shorl.com'
  968. 'short.ie', 'short.to', 'shortna.me', 'shrinkify.com',
  969. 'shw.com', 'si9.org', 'skocz.pl', 'smalur.com', 'sn.im',
  970. 'snipr.com', 'snipurl.com', 'snurl.com', 'spedr.com',
  971. 'starturl.com', 'three.ly', 'timesurl.at', 'tiny.cc', 'tiny.pl',
  972. 'tinyarro.ws', 'tinylink.co.za', 'tinyuri.ca', 'tinyurl.com',
  973. 'tnij.org', 'tr.im', 'turo.us', 'twitclicks.com', 'twitpic.com',
  974. 'twt.fm', 'twurl.cc', 'twurl.nl', 'u.nu', 'ub0.cc', 'uris.jp',
  975. 'urlb.at', 'urlcut.com', 'urlenco.de', 'urlhawk.com',
  976. 'urltea.com', 'vieurl.com', 'w3t.org', 'x.se', 'xaddr.com',
  977. 'xr.com', 'xrl.us', 'yep.it', 'zi.ma', 'zombieurl.com', 'zz.gd'))
  978. def is_short_url(url, domains = _short_domains):
  979. parsed = urlparse.urlparse(url)
  980. if parsed.netloc in domains:
  981. return True
  982. return False
  983. def get_snurl(url):
  984. return get_short_url(url, 'snurl')
  985. def get_isgd(url):
  986. return get_short_url(url, 'isgd')
  987. def get_tinyurl(url):
  988. return get_short_url(url, 'tinyurl')
  989. class UrlShortenerException(Exception):
  990. pass
  991. from .lrucache import LRU
  992. _short_url_cache = LRU(10)
  993. def cache_shortened_url(url, short_url):
  994. if short_url:
  995. _short_url_cache[short_url] = url
  996. class UrlShortener(object):
  997. endpoint = None
  998. def build_request_url(self, url):
  999. return UrlQuery(self.endpoint, d = self.get_args(url.encode('utf-8')))
  1000. def shorten(self, url):
  1001. try:
  1002. resp = urllib2.urlopen(self.build_request_url(url))
  1003. except urllib2.HTTPError, e:
  1004. resp = e
  1005. short_url = self.process_response(resp)
  1006. cache_shortened_url(url, short_url)
  1007. return short_url
  1008. def shorten_async(self, url, success, error=None):
  1009. def async_success(req, resp):
  1010. try:
  1011. ret = self.process_response(resp)
  1012. except Exception as e:
  1013. if error is not None: error(e)
  1014. else:
  1015. cache_shortened_url(url, ret)
  1016. success(ret)
  1017. def async_error(req=None, resp=None):
  1018. print req
  1019. print resp
  1020. if error is not None:
  1021. error(None) # TODO: async interface for errors?
  1022. import common.asynchttp as asynchttp
  1023. asynchttp.httpopen(self.build_request_url(url), success=async_success, error=async_error)
  1024. def get_args(self, url):
  1025. raise NotImplementedError
  1026. def process_response(self, resp):
  1027. if resp.code != 200:
  1028. body = resp.read()
  1029. raise UrlShortenerException(body)
  1030. ret = resp.read()
  1031. return ret
  1032. class ResponseIsResultShortener(UrlShortener):
  1033. def process_response(self, resp):
  1034. ret = UrlShortener.process_response(self, resp)
  1035. if not isurl(ret):
  1036. raise UrlShortenerException(body)
  1037. return ret
  1038. class isgd_shortener(ResponseIsResultShortener):
  1039. endpoint = 'http://is.gd/api.php'
  1040. def get_args(self, url):
  1041. return dict(longurl=url)
  1042. class tinyurl_shortener(ResponseIsResultShortener):
  1043. endpoint = 'http://tinyurl.com/api-create.php'
  1044. def get_args(self, url):
  1045. return dict(url=url)
  1046. class threely_shortener(UrlShortener):
  1047. endpoint = 'http://3.ly/'
  1048. def get_args(self, url):
  1049. return dict(api = 'em5893833',
  1050. u = url)
  1051. def process_response(self, resp):
  1052. ret = UrlShortener.process_response(self, resp)
  1053. if not ret.startswith(self.endpoint):
  1054. raise UrlShortenerException(ret)
  1055. return ret
  1056. class snipr_shortener(UrlShortener):
  1057. endpoint = 'http://snipr.com/site/snip'
  1058. def get_args(self, url):
  1059. return dict(r='simple', link=url.encode('url'))
  1060. def process_response(self, resp):
  1061. ret = UrlShortener.process_response(self, resp)
  1062. if not ret.startswith('http'):
  1063. raise UrlShortenerException('bad url: %r' % ret, ret)
  1064. return ret
  1065. class shortname_shortener(UrlShortener):
  1066. endpoint = 'http://shortna.me/hash/'
  1067. def get_args(self, url):
  1068. return dict(snURL=url, api=0)
  1069. def process_response(self, resp):
  1070. ret = UrlShortener.process_response(self, resp)
  1071. import lxml.html as HTML
  1072. doc = HTML.fromstring(ret)
  1073. links = doc.findall('a')
  1074. for link in links:
  1075. href = link.attrib.get('href')
  1076. if href is not None and href.startswith('http://shortna.me/') and href != 'http://shortna.me':
  1077. return href
  1078. raise UrlShortenerException('short link not found in %r' % ret, ret)
  1079. # not currently used
  1080. class digsby_shortener(UrlShortener):
  1081. endpoint = 'https://accounts.digsby.com/api/shorturl'
  1082. def get_args(self, url):
  1083. import common
  1084. import hashlib
  1085. username = common.profile.username.encode('utf8')
  1086. password = hashlib.sha256(common.profile.password.encode('utf8')).digest()
  1087. return {'user':username, 'pass':password, 'link' : url}
  1088. def process_response(self, httpresp):
  1089. ret = UrlShortener.process_response(self, httpresp)
  1090. resp = simplejson.loads(ret)
  1091. if resp['shorter']['status'] == 'error':
  1092. raise UrlShortenerException(resp['shorter']['errormsg'])
  1093. elif resp['shorter']['status'] == 'ok':
  1094. import common
  1095. url = resp['shorter']['shortURL']
  1096. to_add = common.pref('urlshorteners.digsby.append_text', type = unicode, default = u'')
  1097. return url + to_add
  1098. class bitly_shortener(UrlShortener):
  1099. login = 'digsby'
  1100. api_key = 'R_1fdb0bb8ce9af01f9939c2ffdf391dc8'
  1101. endpoint = 'http://api.bit.ly/shorten'
  1102. def __init__(self, login=None, api_key=None):
  1103. if login is not None:
  1104. self.login = login
  1105. if api_key is not None:
  1106. self.api_key = api_key
  1107. def get_args(self, url):
  1108. return dict(longUrl=url, version='2.0.1', login=self.login, apiKey=self.api_key)
  1109. def process_response(self, resp):
  1110. ret = UrlShortener.process_response(self, resp)
  1111. try:
  1112. info = simplejson.loads(ret)
  1113. except Exception:
  1114. raise UrlShortenerException('expected JSON')
  1115. else:
  1116. if info['errorCode'] == 0:
  1117. return self.extract_shorturl(info)
  1118. else:
  1119. raise UrlShortenerException(info['errorMessage'])
  1120. def extract_shorturl(self, info):
  1121. return info['results'].values()[0]['shortUrl']
  1122. class digsby_bitly_shortener(bitly_shortener):
  1123. def extract_shorturl(self, info):
  1124. return "http://digs.by/" + info['results'].values()[0]['userHash']
  1125. # TODO: add bit.ly, cli.gs, tr.im
  1126. _shorteners = {
  1127. #'snipr' : snipr_shortener,
  1128. #'snurl' : snipr_shortener,
  1129. #'snipurl' : snipr_shortener, # has a new API we didn't implement yet
  1130. 'isgd' : isgd_shortener,
  1131. 'tinyurl' : tinyurl_shortener,
  1132. 'tiny' : tinyurl_shortener,
  1133. 'threely' : threely_shortener,
  1134. '3ly' : threely_shortener,
  1135. 'shortname': shortname_shortener,
  1136. 'digsby' : digsby_bitly_shortener,
  1137. }
  1138. def get_short_url(url, provider=None, choices = None):
  1139. """
  1140. Gets a shortened url from 'provider' through their api.
  1141. Intended to be used with threaded:
  1142. threaded(get_short_url)(url, 'tinyurl', success=func, error=func)
  1143. @param url: The URL to be snipped
  1144. @param provider: The shortening service to use.
  1145. """
  1146. if choices is None:
  1147. choices = list(_shorteners.keys())
  1148. choices = choices[:]
  1149. random.shuffle(choices)
  1150. if provider is not None:
  1151. choices.append(provider)
  1152. else:
  1153. import common
  1154. choices.append(common.pref("url_shortener.default", type = basestring, default = 'digsby'))
  1155. e = None
  1156. while choices:
  1157. try:
  1158. provider = choices.pop()
  1159. shortener = _shorteners.get(provider)
  1160. if shortener is None:
  1161. raise Exception("UrlShortener provider %r not found", provider)
  1162. return shortener().shorten(url)
  1163. # except UrlShortenerException, e:
  1164. # log.error('error getting short URL from %r: %r', provider, e)
  1165. # raise e
  1166. except Exception, e:
  1167. log.error('error getting short URL from %r: %r', provider, e)
  1168. shortener = provider = None
  1169. if e is None:
  1170. e = Exception('No shorteners found!')
  1171. # none of them worked
  1172. raise e
  1173. def wget(url, data=None):
  1174. '''
  1175. return urllib2.urlopen(url, data).read()
  1176. '''
  1177. from contextlib import closing
  1178. import urllib2
  1179. with closing(urllib2.urlopen(url, data=data)) as web:
  1180. return web.read()
  1181. def long_url_from_cache(shorturl):
  1182. try:
  1183. return _short_url_cache[shorturl]
  1184. except KeyError:
  1185. return None
  1186. def unshorten_url(url, cb):
  1187. longurl = long_url_from_cache(url)
  1188. if url is not None:
  1189. return cb(longurl)
  1190. requrl = UrlQuery('http://untiny.me/api/1.0/extract',
  1191. url=url, format='json')
  1192. def success(req, resp):
  1193. json = resp.read()
  1194. unshortened_url = simplejson.loads(json)['org_url']
  1195. cb(unshortened_url)
  1196. def error(req, resp):
  1197. pass
  1198. import common.asynchttp as asynchttp
  1199. return asynchttp.httpopen(requrl, success=success, error=error)
  1200. def timestamp_to_http_date(ts):
  1201. return rfc822.formatdate(timeval=ts)
  1202. def http_date_to_timestamp(date_str):
  1203. if date_str is None:
  1204. return None
  1205. return calendar.timegm(rfc822.parsedate(date_str))
  1206. def user_agent():
  1207. return 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.10) Gecko/20100914 Firefox/3.6.15'
  1208. if __name__ == '__main__':
  1209. print get_snurl('http://www.google.com')