PageRenderTime 84ms CodeModel.GetById 38ms RepoModel.GetById 1ms app.codeStats 0ms

/assets/goagent.py

https://github.com/yourmoonlight/gaeproxy
Python | 2417 lines | 2293 code | 65 blank | 59 comment | 108 complexity | 2ee13670943693f3914cc4607be18fb3 MD5 | raw file
Possible License(s): GPL-3.0
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. # Based on GAppProxy 2.0.0 by Du XiaoGang <dugang.2008@gmail.com>
  4. # Based on WallProxy 0.4.0 by Hust Moon <www.ehust@gmail.com>
  5. # Contributor:
  6. # Phus Lu <phus.lu@gmail.com>
  7. # Hewig Xu <hewigovens@gmail.com>
  8. # Ayanamist Yang <ayanamist@gmail.com>
  9. # V.E.O <V.E.O@tom.com>
  10. # Max Lv <max.c.lv@gmail.com>
  11. # AlsoTang <alsotang@gmail.com>
  12. # Christopher Meng <i@cicku.me>
  13. # Yonsm Guo <YonsmGuo@gmail.com>
  14. # Parkman <cseparkman@gmail.com>
  15. # Ming Bai <mbbill@gmail.com>
  16. # Bin Yu <yubinlove1991@gmail.com>
  17. # lileixuan <lileixuan@gmail.com>
  18. # Cong Ding <cong@cding.org>
  19. # Zhang Youfu <zhangyoufu@gmail.com>
  20. # Lu Wei <luwei@barfoo>
  21. # Harmony Meow <harmony.meow@gmail.com>
  22. # logostream <logostream@gmail.com>
  23. # Rui Wang <isnowfy@gmail.com>
  24. # Wang Wei Qiang <wwqgtxx@gmail.com>
  25. # Felix Yan <felixonmars@gmail.com>
  26. # Sui Feng <suifeng.me@qq.com>
  27. # QXO <qxodream@gmail.com>
  28. # Geek An <geekan@foxmail.com>
  29. # Poly Rabbit <mcx_221@foxmail.com>
  30. # oxnz <yunxinyi@gmail.com>
  31. # Shusen Liu <liushusen.smart@gmail.com>
  32. # Yad Smood <y.s.inside@gmail.com>
  33. # Chen Shuang <cs0x7f@gmail.com>
  34. # cnfuyu <cnfuyu@gmail.com>
  35. # cuixin <steven.cuixin@gmail.com>
  36. # s2marine0 <s2marine0@gmail.com>
  37. # Toshio Xiang <snachx@gmail.com>
  38. __version__ = '3.1.5'
  39. import sys
  40. import os
  41. import glob
  42. # GAEProxy Patch
  43. # The sys path in Android is set up outside.
  44. try:
  45. import gevent
  46. import gevent.socket
  47. import gevent.server
  48. import gevent.queue
  49. import gevent.monkey
  50. gevent.monkey.patch_all(subprocess=True)
  51. except ImportError:
  52. gevent = None
  53. except TypeError:
  54. gevent.monkey.patch_all()
  55. sys.stderr.write('\033[31m Warning: Please update gevent to the latest 1.0 version!\033[0m\n')
  56. import errno
  57. import binascii
  58. import time
  59. import struct
  60. import collections
  61. import zlib
  62. import functools
  63. import itertools
  64. import re
  65. import io
  66. import fnmatch
  67. import traceback
  68. import random
  69. import subprocess
  70. import base64
  71. import string
  72. import hashlib
  73. import threading
  74. import thread
  75. import socket
  76. import ssl
  77. import select
  78. import Queue
  79. import SocketServer
  80. import ConfigParser
  81. import BaseHTTPServer
  82. import httplib
  83. import urllib2
  84. import urlparse
  85. try:
  86. import dnslib
  87. except ImportError:
  88. dnslib = None
  89. try:
  90. import OpenSSL
  91. except ImportError:
  92. OpenSSL = None
  93. try:
  94. import pacparser
  95. except ImportError:
  96. pacparser = None
  97. # GAEProxy Patch
  98. class NullDevice():
  99. def write(self, s):
  100. pass
  101. sys.stdout = NullDevice()
  102. sys.stderr = sys.stdout
  103. class DNSCacheUtil(object):
  104. '''DNSCache module, integrated with GAEProxy'''
  105. cache = {"127.0.0.1": 'localhost'}
  106. @staticmethod
  107. def getHost(address):
  108. p = "(?:\d{1,3}\.){3}\d{1,3}"
  109. if re.match(p, address) is None:
  110. return
  111. if address in DNSCacheUtil.cache:
  112. return DNSCacheUtil.cache[address]
  113. host = None
  114. sock = None
  115. address_family = socket.AF_INET
  116. retry = 0
  117. while address not in DNSCacheUtil.cache:
  118. try:
  119. sock = socket.socket(family=address_family, type=socket.SOCK_STREAM)
  120. sock.settimeout(2)
  121. sock.connect(("127.0.0.1", 9090))
  122. sock.sendall(address + "\r\n")
  123. host = sock.recv(512)
  124. if host is not None and not host.startswith("null"):
  125. host = host.strip()
  126. DNSCacheUtil.cache[address] = host
  127. break
  128. else:
  129. if retry > 3:
  130. host = None
  131. break
  132. else:
  133. retry = retry + 1
  134. continue
  135. except socket.error as e:
  136. if e[0] in (10060, 'timed out'):
  137. continue
  138. except Exception, e:
  139. logging.error('reverse dns query exception: %s', e)
  140. break
  141. finally:
  142. if sock:
  143. sock.close()
  144. return host
  145. HAS_PYPY = hasattr(sys, 'pypy_version_info')
  146. NetWorkIOError = (socket.error, ssl.SSLError, OSError) if not OpenSSL else (socket.error, ssl.SSLError, OpenSSL.SSL.Error, OSError)
  147. class Logging(type(sys)):
  148. CRITICAL = 50
  149. FATAL = CRITICAL
  150. ERROR = 40
  151. WARNING = 30
  152. WARN = WARNING
  153. INFO = 20
  154. DEBUG = 10
  155. NOTSET = 0
  156. def __init__(self, *args, **kwargs):
  157. self.level = self.__class__.INFO
  158. #GAEProxy Patch
  159. @classmethod
  160. def getLogger(cls, *args, **kwargs):
  161. return cls(*args, **kwargs)
  162. def basicConfig(self, *args, **kwargs):
  163. self.level = int(kwargs.get('level', self.__class__.INFO))
  164. if self.level > self.__class__.DEBUG:
  165. self.debug = self.dummy
  166. def log(self, level, fmt, *args, **kwargs):
  167. sys.stderr.write('%s - [%s] %s\n' % (level, time.ctime()[4:-5], fmt % args))
  168. def dummy(self, *args, **kwargs):
  169. pass
  170. def debug(self, fmt, *args, **kwargs):
  171. self.log('DEBUG', fmt, *args, **kwargs)
  172. def info(self, fmt, *args, **kwargs):
  173. self.log('INFO', fmt, *args)
  174. def warning(self, fmt, *args, **kwargs):
  175. self.log('WARNING', fmt, *args, **kwargs)
  176. def warn(self, fmt, *args, **kwargs):
  177. self.warning(fmt, *args, **kwargs)
  178. def error(self, fmt, *args, **kwargs):
  179. self.log('ERROR', fmt, *args, **kwargs)
  180. def exception(self, fmt, *args, **kwargs):
  181. self.error(fmt, *args, **kwargs)
  182. sys.stderr.write(traceback.format_exc() + '\n')
  183. def critical(self, fmt, *args, **kwargs):
  184. self.log('CRITICAL', fmt, *args, **kwargs)
  185. logging = sys.modules['logging'] = Logging('logging')
  186. class LRUCache(object):
  187. """http://pypi.python.org/pypi/lru/"""
  188. def __init__(self, max_items=100):
  189. self.cache = {}
  190. self.key_order = []
  191. self.max_items = max_items
  192. def __setitem__(self, key, value):
  193. self.cache[key] = value
  194. self._mark(key)
  195. def __getitem__(self, key):
  196. value = self.cache[key]
  197. self._mark(key)
  198. return value
  199. def _mark(self, key):
  200. if key in self.key_order:
  201. self.key_order.remove(key)
  202. self.key_order.insert(0, key)
  203. if len(self.key_order) > self.max_items:
  204. remove = self.key_order[self.max_items]
  205. del self.cache[remove]
  206. self.key_order.pop(self.max_items)
  207. def clear(self):
  208. self.cache = {}
  209. self.key_order = []
  210. class CertUtil(object):
  211. """CertUtil module, based on mitmproxy"""
  212. ca_vendor = 'GoAgent'
  213. ca_keyfile = 'CA.crt'
  214. ca_certdir = 'certs'
  215. ca_lock = threading.Lock()
  216. @staticmethod
  217. def create_ca():
  218. key = OpenSSL.crypto.PKey()
  219. key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  220. ca = OpenSSL.crypto.X509()
  221. ca.set_serial_number(0)
  222. ca.set_version(2)
  223. subj = ca.get_subject()
  224. subj.countryName = 'CN'
  225. subj.stateOrProvinceName = 'Internet'
  226. subj.localityName = 'Cernet'
  227. subj.organizationName = CertUtil.ca_vendor
  228. subj.organizationalUnitName = '%s Root' % CertUtil.ca_vendor
  229. subj.commonName = '%s CA' % CertUtil.ca_vendor
  230. ca.gmtime_adj_notBefore(0)
  231. ca.gmtime_adj_notAfter(24 * 60 * 60 * 3652)
  232. ca.set_issuer(ca.get_subject())
  233. ca.set_pubkey(key)
  234. ca.add_extensions([
  235. OpenSSL.crypto.X509Extension(b'basicConstraints', True, b'CA:TRUE'),
  236. OpenSSL.crypto.X509Extension(b'nsCertType', True, b'sslCA'),
  237. OpenSSL.crypto.X509Extension(b'extendedKeyUsage', True, b'serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC'),
  238. OpenSSL.crypto.X509Extension(b'keyUsage', False, b'keyCertSign, cRLSign'),
  239. OpenSSL.crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', subject=ca), ])
  240. ca.sign(key, 'sha1')
  241. return key, ca
  242. @staticmethod
  243. def dump_ca():
  244. key, ca = CertUtil.create_ca()
  245. with open(CertUtil.ca_keyfile, 'wb') as fp:
  246. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, ca))
  247. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key))
  248. @staticmethod
  249. def _get_cert(commonname, sans=()):
  250. with open(CertUtil.ca_keyfile, 'rb') as fp:
  251. content = fp.read()
  252. key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, content)
  253. ca = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, content)
  254. pkey = OpenSSL.crypto.PKey()
  255. pkey.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  256. req = OpenSSL.crypto.X509Req()
  257. subj = req.get_subject()
  258. subj.countryName = 'CN'
  259. subj.stateOrProvinceName = 'Internet'
  260. subj.localityName = 'Cernet'
  261. subj.organizationalUnitName = '%s Branch' % CertUtil.ca_vendor
  262. if commonname[0] == '.':
  263. subj.commonName = '*' + commonname
  264. subj.organizationName = '*' + commonname
  265. sans = ['*'+commonname] + [x for x in sans if x != '*'+commonname]
  266. else:
  267. subj.commonName = commonname
  268. subj.organizationName = commonname
  269. sans = [commonname] + [x for x in sans if x != commonname]
  270. # GAEProxy Patch
  271. req.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  272. req.set_pubkey(pkey)
  273. req.sign(pkey, 'sha1')
  274. cert = OpenSSL.crypto.X509()
  275. # GAEProxy Patch
  276. cert.set_version(3)
  277. try:
  278. cert.set_serial_number(int(hashlib.md5(commonname.encode('utf-8')).hexdigest(), 16))
  279. except OpenSSL.SSL.Error:
  280. cert.set_serial_number(int(time.time()*1000))
  281. cert.gmtime_adj_notBefore(0)
  282. cert.gmtime_adj_notAfter(60 * 60 * 24 * 3652)
  283. cert.set_issuer(ca.get_subject())
  284. cert.set_subject(req.get_subject())
  285. cert.set_pubkey(req.get_pubkey())
  286. if commonname[0] == '.':
  287. sans = ['*'+commonname] + [s for s in sans if s != '*'+commonname]
  288. else:
  289. sans = [commonname] + [s for s in sans if s != commonname]
  290. # GAEProxy Patch
  291. cert.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  292. cert.sign(key, 'sha1')
  293. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  294. with open(certfile, 'wb') as fp:
  295. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert))
  296. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, pkey))
  297. return certfile
  298. @staticmethod
  299. def get_cert(commonname, sans=()):
  300. # GAEProxy Patch
  301. sans = ["*.akamaihd.net","*.fbcdn.net","*.google.com","*.appspot.com","*.googleapis.com","*.googlevideo.com","*.twitter.com","*.facebook.com","*.whatsapp.net"]
  302. if commonname.count('.') >= 2 and [len(x) for x in reversed(commonname.split('.'))] > [2, 4]:
  303. commonname = '.'+commonname.partition('.')[-1]
  304. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  305. if os.path.exists(certfile):
  306. return certfile
  307. elif OpenSSL is None:
  308. return CertUtil.ca_keyfile
  309. else:
  310. with CertUtil.ca_lock:
  311. if os.path.exists(certfile):
  312. return certfile
  313. return CertUtil._get_cert(commonname, sans)
  314. @staticmethod
  315. def import_ca(certfile):
  316. commonname = os.path.splitext(os.path.basename(certfile))[0]
  317. if OpenSSL:
  318. try:
  319. with open(certfile, 'rb') as fp:
  320. x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, fp.read())
  321. commonname = next(v.decode() for k, v in x509.get_subject().get_components() if k == b'O')
  322. except Exception as e:
  323. logging.error('load_certificate(certfile=%r) failed:%s', certfile, e)
  324. # GAEProxy Patch
  325. return 0
  326. @staticmethod
  327. def check_ca():
  328. #Check CA exists
  329. capath = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_keyfile)
  330. certdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_certdir)
  331. if not os.path.exists(capath):
  332. if not OpenSSL:
  333. logging.critical('CA.key is not exist and OpenSSL is disabled, ABORT!')
  334. sys.exit(-1)
  335. if os.path.exists(certdir):
  336. if os.path.isdir(certdir):
  337. any(os.remove(x) for x in glob.glob(certdir+'/*.crt')+glob.glob(certdir+'/.*.crt'))
  338. else:
  339. os.remove(certdir)
  340. os.mkdir(certdir)
  341. CertUtil.dump_ca()
  342. if glob.glob('%s/*.key' % CertUtil.ca_certdir):
  343. for filename in glob.glob('%s/*.key' % CertUtil.ca_certdir):
  344. try:
  345. os.remove(filename)
  346. os.remove(os.path.splitext(filename)[0]+'.crt')
  347. except EnvironmentError:
  348. pass
  349. #Check CA imported
  350. if CertUtil.import_ca(capath) != 0:
  351. logging.warning('install root certificate failed, Please run as administrator/root/sudo')
  352. #Check Certs Dir
  353. if not os.path.exists(certdir):
  354. os.makedirs(certdir)
  355. class SSLConnection(object):
  356. has_gevent = socket.socket is getattr(sys.modules.get('gevent.socket'), 'socket', None)
  357. def __init__(self, context, sock):
  358. self._context = context
  359. self._sock = sock
  360. self._connection = OpenSSL.SSL.Connection(context, sock)
  361. self._makefile_refs = 0
  362. if self.has_gevent:
  363. self._wait_read = gevent.socket.wait_read
  364. self._wait_write = gevent.socket.wait_write
  365. self._wait_readwrite = gevent.socket.wait_readwrite
  366. else:
  367. self._wait_read = lambda fd,t: select.select([fd], [], [fd], t)
  368. self._wait_write = lambda fd,t: select.select([], [fd], [fd], t)
  369. self._wait_readwrite = lambda fd,t: select.select([fd], [fd], [fd], t)
  370. def __getattr__(self, attr):
  371. if attr not in ('_context', '_sock', '_connection', '_makefile_refs'):
  372. return getattr(self._connection, attr)
  373. def accept(self):
  374. sock, addr = self._sock.accept()
  375. client = OpenSSL.SSL.Connection(sock._context, sock)
  376. return client, addr
  377. def do_handshake(self):
  378. timeout = self._sock.gettimeout()
  379. while True:
  380. try:
  381. self._connection.do_handshake()
  382. break
  383. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError, OpenSSL.SSL.WantWriteError):
  384. sys.exc_clear()
  385. self._wait_readwrite(self._sock.fileno(), timeout)
  386. def connect(self, *args, **kwargs):
  387. timeout = self._sock.gettimeout()
  388. while True:
  389. try:
  390. self._connection.connect(*args, **kwargs)
  391. break
  392. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  393. sys.exc_clear()
  394. self._wait_read(self._sock.fileno(), timeout)
  395. except OpenSSL.SSL.WantWriteError:
  396. sys.exc_clear()
  397. self._wait_write(self._sock.fileno(), timeout)
  398. def send(self, data, flags=0):
  399. timeout = self._sock.gettimeout()
  400. while True:
  401. try:
  402. self._connection.send(data, flags)
  403. break
  404. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  405. sys.exc_clear()
  406. self._wait_read(self._sock.fileno(), timeout)
  407. except OpenSSL.SSL.WantWriteError:
  408. sys.exc_clear()
  409. self._wait_write(self._sock.fileno(), timeout)
  410. except OpenSSL.SSL.SysCallError as e:
  411. if e[0] == -1 and not data:
  412. # errors when writing empty strings are expected and can be ignored
  413. return 0
  414. raise
  415. def recv(self, bufsiz, flags=0):
  416. timeout = self._sock.gettimeout()
  417. pending = self._connection.pending()
  418. if pending:
  419. return self._connection.recv(min(pending, bufsiz))
  420. while True:
  421. try:
  422. return self._connection.recv(bufsiz, flags)
  423. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  424. sys.exc_clear()
  425. self._wait_read(self._sock.fileno(), timeout)
  426. except OpenSSL.SSL.WantWriteError:
  427. sys.exc_clear()
  428. self._wait_write(self._sock.fileno(), timeout)
  429. except OpenSSL.SSL.ZeroReturnError:
  430. return ''
  431. def read(self, bufsiz, flags=0):
  432. return self.recv(bufsiz, flags)
  433. def write(self, buf, flags=0):
  434. return self.sendall(buf, flags)
  435. def close(self):
  436. if self._makefile_refs < 1:
  437. self._connection = None
  438. if self._sock:
  439. socket.socket.close(self._sock)
  440. else:
  441. self._makefile_refs -= 1
  442. def makefile(self, mode='r', bufsize=-1):
  443. self._makefile_refs += 1
  444. return socket._fileobject(self, mode, bufsize, close=True)
  445. class ProxyUtil(object):
  446. """ProxyUtil module, based on urllib2"""
  447. @staticmethod
  448. def parse_proxy(proxy):
  449. return urllib2._parse_proxy(proxy)
  450. @staticmethod
  451. def get_system_proxy():
  452. proxies = urllib2.getproxies()
  453. return proxies.get('https') or proxies.get('http') or {}
  454. @staticmethod
  455. def get_listen_ip():
  456. sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  457. sock.connect(('8.8.8.8', 53))
  458. listen_ip = sock.getsockname()[0]
  459. sock.close()
  460. return listen_ip
  461. # GAEProxy Patch
  462. # No PAC
  463. def dns_remote_resolve(qname, dnsservers, blacklist, timeout):
  464. """
  465. http://gfwrev.blogspot.com/2009/11/gfwdns.html
  466. http://zh.wikipedia.org/wiki/域名服务器缓存污染
  467. http://support.microsoft.com/kb/241352
  468. """
  469. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(qname))
  470. query_data = query.pack()
  471. dns_v4_servers = [x for x in dnsservers if ':' not in x]
  472. dns_v6_servers = [x for x in dnsservers if ':' in x]
  473. sock_v4 = sock_v6 = None
  474. socks = []
  475. if dns_v4_servers:
  476. sock_v4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  477. socks.append(sock_v4)
  478. if dns_v6_servers:
  479. sock_v6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
  480. socks.append(sock_v6)
  481. timeout_at = time.time() + timeout
  482. try:
  483. for _ in xrange(2):
  484. try:
  485. for dnsserver in dns_v4_servers:
  486. sock_v4.sendto(query_data, (dnsserver, 53))
  487. for dnsserver in dns_v6_servers:
  488. sock_v6.sendto(query_data, (dnsserver, 53))
  489. while time.time() < timeout_at:
  490. ins, _, _ = select.select(socks, [], [], 0.1)
  491. for sock in ins:
  492. reply_data, _ = sock.recvfrom(512)
  493. reply = dnslib.DNSRecord.parse(reply_data)
  494. rtypes = (1, 28) if sock is sock_v6 else (1,)
  495. iplist = [str(x.rdata) for x in reply.rr if x.rtype in rtypes]
  496. if any(x in blacklist for x in iplist):
  497. logging.warning('query qname=%r reply bad iplist=%r', qname, iplist)
  498. else:
  499. logging.debug('query qname=%r reply iplist=%s', qname, iplist)
  500. return iplist
  501. except socket.error as e:
  502. logging.warning('handle dns query=%s socket: %r', query, e)
  503. finally:
  504. for sock in socks:
  505. sock.close()
  506. def get_dnsserver_list():
  507. if os.name == 'nt':
  508. import ctypes, ctypes.wintypes, struct, socket
  509. DNS_CONFIG_DNS_SERVER_LIST = 6
  510. buf = ctypes.create_string_buffer(2048)
  511. ctypes.windll.dnsapi.DnsQueryConfig(DNS_CONFIG_DNS_SERVER_LIST, 0, None, None, ctypes.byref(buf), ctypes.byref(ctypes.wintypes.DWORD(len(buf))))
  512. ips = struct.unpack('I', buf[0:4])[0]
  513. out = []
  514. for i in xrange(ips):
  515. start = (i+1) * 4
  516. out.append(socket.inet_ntoa(buf[start:start+4]))
  517. return out
  518. elif os.path.isfile('/etc/resolv.conf'):
  519. with open('/etc/resolv.conf', 'rb') as fp:
  520. return re.findall(r'(?m)^nameserver\s+(\S+)', fp.read())
  521. else:
  522. logging.warning("get_dnsserver_list failed: unsupport platform '%s-%s'", sys.platform, os.name)
  523. return []
  524. def spawn_later(seconds, target, *args, **kwargs):
  525. def wrap(*args, **kwargs):
  526. __import__('time').sleep(seconds)
  527. return target(*args, **kwargs)
  528. return __import__('thread').start_new_thread(wrap, args, kwargs)
  529. class HTTPUtil(object):
  530. """HTTP Request Class"""
  531. MessageClass = dict
  532. protocol_version = 'HTTP/1.1'
  533. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  534. ssl_validate = False
  535. ssl_obfuscate = False
  536. ssl_ciphers = ':'.join(['ECDHE-ECDSA-AES256-SHA',
  537. 'ECDHE-RSA-AES256-SHA',
  538. 'DHE-RSA-CAMELLIA256-SHA',
  539. 'DHE-DSS-CAMELLIA256-SHA',
  540. 'DHE-RSA-AES256-SHA',
  541. 'DHE-DSS-AES256-SHA',
  542. 'ECDH-RSA-AES256-SHA',
  543. 'ECDH-ECDSA-AES256-SHA',
  544. 'CAMELLIA256-SHA',
  545. 'AES256-SHA',
  546. 'ECDHE-ECDSA-RC4-SHA',
  547. 'ECDHE-ECDSA-AES128-SHA',
  548. 'ECDHE-RSA-RC4-SHA',
  549. 'ECDHE-RSA-AES128-SHA',
  550. 'DHE-RSA-CAMELLIA128-SHA',
  551. 'DHE-DSS-CAMELLIA128-SHA',
  552. 'DHE-RSA-AES128-SHA',
  553. 'DHE-DSS-AES128-SHA',
  554. 'ECDH-RSA-RC4-SHA',
  555. 'ECDH-RSA-AES128-SHA',
  556. 'ECDH-ECDSA-RC4-SHA',
  557. 'ECDH-ECDSA-AES128-SHA',
  558. 'SEED-SHA',
  559. 'CAMELLIA128-SHA',
  560. 'RC4-SHA',
  561. 'RC4-MD5',
  562. 'AES128-SHA',
  563. 'ECDHE-ECDSA-DES-CBC3-SHA',
  564. 'ECDHE-RSA-DES-CBC3-SHA',
  565. 'EDH-RSA-DES-CBC3-SHA',
  566. 'EDH-DSS-DES-CBC3-SHA',
  567. 'ECDH-RSA-DES-CBC3-SHA',
  568. 'ECDH-ECDSA-DES-CBC3-SHA',
  569. 'DES-CBC3-SHA',
  570. 'TLS_EMPTY_RENEGOTIATION_INFO_SCSV'])
  571. def __init__(self, max_window=4, max_timeout=8, max_retry=4, proxy='', dns_servers=[], dns_blacklist=set()):
  572. # http://docs.python.org/dev/library/ssl.html
  573. # http://blog.ivanristic.com/2009/07/examples-of-the-information-collected-from-ssl-handshakes.html
  574. # http://src.chromium.org/svn/trunk/src/net/third_party/nss/ssl/sslenum.c
  575. # http://www.openssl.org/docs/apps/ciphers.html
  576. # openssl s_server -accept 443 -key CA.crt -cert CA.crt
  577. # set_ciphers as Modern Browsers
  578. self.max_window = max_window
  579. self.max_retry = max_retry
  580. self.max_timeout = max_timeout
  581. self.tcp_connection_time = collections.defaultdict(float)
  582. self.tcp_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  583. self.ssl_connection_time = collections.defaultdict(float)
  584. self.ssl_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  585. self.dns = {}
  586. self.proxy = proxy
  587. self.openssl_context = None
  588. if self.proxy:
  589. self.dns_resolve = self.__dns_resolve_withproxy
  590. self.create_connection = self.__create_connection_withproxy
  591. self.create_ssl_connection = self.__create_ssl_connection_withproxy
  592. self.dns_servers = dns_servers
  593. self.dns_blacklist = dns_blacklist
  594. def set_openssl_option(self, validate=True, obfuscate=True):
  595. if self.openssl_context is None:
  596. self.openssl_context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
  597. self.openssl_context.set_session_id(binascii.b2a_hex(os.urandom(10)))
  598. if hasattr(OpenSSL.SSL, 'SESS_CACHE_BOTH'):
  599. self.openssl_context.set_session_cache_mode(OpenSSL.SSL.SESS_CACHE_BOTH)
  600. if validate:
  601. self.openssl_context.load_verify_locations(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cacert.pem'))
  602. self.openssl_context.set_verify(OpenSSL.SSL.VERIFY_PEER, lambda c, x, e, d, ok: ok)
  603. if obfuscate:
  604. ssl_ciphers = ':'.join(x for x in self.ssl_ciphers.split(':') if random.random() > 0.5)
  605. self.openssl_context.set_cipher_list(ssl_ciphers)
  606. def dns_resolve(self, host, dnsservers=[], ipv4_only=True):
  607. iplist = self.dns.get(host)
  608. if not iplist:
  609. if not dnsservers:
  610. iplist = list(set(socket.gethostbyname_ex(host)[-1]) - self.dns_blacklist)
  611. else:
  612. iplist = dns_remote_resolve(host, dnsservers, self.dns_blacklist, timeout=2)
  613. if not iplist:
  614. iplist = dns_remote_resolve(host, self.dns_servers, self.dns_blacklist, timeout=2)
  615. if ipv4_only:
  616. iplist = [ip for ip in iplist if re.match(r'\d+\.\d+\.\d+\.\d+', ip)]
  617. self.dns[host] = iplist = list(set(iplist))
  618. return iplist
  619. def __dns_resolve_withproxy(self, host, dnsservers=[], ipv4_only=True):
  620. return [host]
  621. def create_connection(self, address, timeout=None, source_address=None, **kwargs):
  622. connection_cache_key = kwargs.get('cache_key')
  623. def _create_connection(ipaddr, timeout, queobj):
  624. sock = None
  625. try:
  626. # create a ipv4/ipv6 socket object
  627. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  628. # set reuseaddr option to avoid 10048 socket error
  629. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  630. # resize socket recv buffer 8K->32K to improve browser releated application performance
  631. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  632. # disable nagle algorithm to send http request quickly.
  633. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  634. # set a short timeout to trigger timeout retry more quickly.
  635. sock.settimeout(timeout or self.max_timeout)
  636. # start connection time record
  637. start_time = time.time()
  638. # TCP connect
  639. sock.connect(ipaddr)
  640. # record TCP connection time
  641. self.tcp_connection_time[ipaddr] = time.time() - start_time
  642. # put ssl socket object to output queobj
  643. queobj.put(sock)
  644. except (socket.error, OSError) as e:
  645. # any socket.error, put Excpetions to output queobj.
  646. queobj.put(e)
  647. # reset a large and random timeout to the ipaddr
  648. self.tcp_connection_time[ipaddr] = self.max_timeout+random.random()
  649. # close tcp socket
  650. if sock:
  651. sock.close()
  652. def _close_connection(count, queobj):
  653. for i in range(count):
  654. sock = queobj.get()
  655. if sock and not isinstance(sock, Exception):
  656. if connection_cache_key and i == 0:
  657. self.tcp_connection_cache[connection_cache_key].put((time.time(), sock))
  658. else:
  659. sock.close()
  660. try:
  661. while connection_cache_key:
  662. ctime, sock = self.tcp_connection_cache[connection_cache_key].get_nowait()
  663. if time.time() - ctime < 30:
  664. return sock
  665. except Queue.Empty:
  666. pass
  667. host, port = address
  668. result = None
  669. addresses = [(x, port) for x in self.dns_resolve(host)]
  670. if port == 443:
  671. get_connection_time = lambda addr: self.ssl_connection_time.__getitem__(addr) or self.tcp_connection_time.__getitem__(addr)
  672. else:
  673. get_connection_time = self.tcp_connection_time.__getitem__
  674. for i in range(self.max_retry):
  675. window = min((self.max_window+1)//2 + min(i, 1), len(addresses))
  676. addresses.sort(key=get_connection_time)
  677. addrs = addresses[:window] + random.sample(addresses, min(len(addresses), window, self.max_window-window))
  678. queobj = Queue.Queue()
  679. for addr in addrs:
  680. thread.start_new_thread(_create_connection, (addr, timeout, queobj))
  681. for i in range(len(addrs)):
  682. result = queobj.get()
  683. if not isinstance(result, (socket.error, OSError)):
  684. thread.start_new_thread(_close_connection, (len(addrs)-i-1, queobj))
  685. return result
  686. else:
  687. if i == 0:
  688. # only output first error
  689. logging.warning('create_connection to %s return %r, try again.', addrs, result)
  690. def create_ssl_connection(self, address, timeout=None, source_address=None, **kwargs):
  691. connection_cache_key = kwargs.get('cache_key')
  692. validate = kwargs.get('validate')
  693. def _create_ssl_connection(ipaddr, timeout, queobj):
  694. sock = None
  695. ssl_sock = None
  696. try:
  697. # create a ipv4/ipv6 socket object
  698. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  699. # set reuseaddr option to avoid 10048 socket error
  700. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  701. # resize socket recv buffer 8K->32K to improve browser releated application performance
  702. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  703. # disable negal algorithm to send http request quickly.
  704. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  705. # set a short timeout to trigger timeout retry more quickly.
  706. sock.settimeout(timeout or self.max_timeout)
  707. # pick up the certificate
  708. if not validate:
  709. ssl_sock = ssl.wrap_socket(sock, do_handshake_on_connect=False)
  710. else:
  711. ssl_sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=os.path.join(os.path.dirname(os.path.abspath(__file__)),'cacert.pem'), do_handshake_on_connect=False)
  712. ssl_sock.settimeout(timeout or self.max_timeout)
  713. # start connection time record
  714. start_time = time.time()
  715. # TCP connect
  716. ssl_sock.connect(ipaddr)
  717. connected_time = time.time()
  718. # SSL handshake
  719. ssl_sock.do_handshake()
  720. handshaked_time = time.time()
  721. # record TCP connection time
  722. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  723. # record SSL connection time
  724. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  725. ssl_sock.ssl_time = connected_time - start_time
  726. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  727. ssl_sock.sock = sock
  728. # verify SSL certificate.
  729. if validate and address[0].endswith('.appspot.com'):
  730. cert = ssl_sock.getpeercert()
  731. orgname = next((v for ((k, v),) in cert['subject'] if k == 'organizationName'))
  732. if not orgname.lower().startswith('google '):
  733. raise ssl.SSLError("%r certificate organizationName(%r) not startswith 'Google'" % (address[0], orgname))
  734. # put ssl socket object to output queobj
  735. queobj.put(ssl_sock)
  736. except (socket.error, ssl.SSLError, OSError) as e:
  737. # any socket.error, put Excpetions to output queobj.
  738. queobj.put(e)
  739. # reset a large and random timeout to the ipaddr
  740. self.ssl_connection_time[ipaddr] = self.max_timeout + random.random()
  741. # close ssl socket
  742. if ssl_sock:
  743. ssl_sock.close()
  744. # close tcp socket
  745. if sock:
  746. sock.close()
  747. def _create_openssl_connection(ipaddr, timeout, queobj):
  748. sock = None
  749. ssl_sock = None
  750. try:
  751. # create a ipv4/ipv6 socket object
  752. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  753. # set reuseaddr option to avoid 10048 socket error
  754. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  755. # resize socket recv buffer 8K->32K to improve browser releated application performance
  756. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  757. # disable negal algorithm to send http request quickly.
  758. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  759. # set a short timeout to trigger timeout retry more quickly.
  760. sock.settimeout(timeout or self.max_timeout)
  761. # pick up the certificate
  762. server_hostname = b'www.google.com' if address[0].endswith('.appspot.com') else None
  763. ssl_sock = SSLConnection(self.openssl_context, sock)
  764. ssl_sock.set_connect_state()
  765. if server_hostname:
  766. ssl_sock.set_tlsext_host_name(server_hostname)
  767. # start connection time record
  768. start_time = time.time()
  769. # TCP connect
  770. ssl_sock.connect(ipaddr)
  771. connected_time = time.time()
  772. # SSL handshake
  773. ssl_sock.do_handshake()
  774. handshaked_time = time.time()
  775. # record TCP connection time
  776. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  777. # record SSL connection time
  778. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  779. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  780. ssl_sock.sock = sock
  781. # verify SSL certificate.
  782. if validate and address[0].endswith('.appspot.com'):
  783. cert = ssl_sock.get_peer_certificate()
  784. commonname = next((v for k, v in cert.get_subject().get_components() if k == 'CN'))
  785. if '.google' not in commonname and not commonname.endswith('.appspot.com'):
  786. raise socket.error("Host name '%s' doesn't match certificate host '%s'" % (address[0], commonname))
  787. # put ssl socket object to output queobj
  788. queobj.put(ssl_sock)
  789. except (socket.error, OpenSSL.SSL.Error, OSError) as e:
  790. # any socket.error, put Excpetions to output queobj.
  791. queobj.put(e)
  792. # reset a large and random timeout to the ipaddr
  793. self.ssl_connection_time[ipaddr] = self.max_timeout + random.random()
  794. # close ssl socket
  795. if ssl_sock:
  796. ssl_sock.close()
  797. # close tcp socket
  798. if sock:
  799. sock.close()
  800. def _close_ssl_connection(count, queobj, first_tcp_time, first_ssl_time):
  801. for i in range(count):
  802. sock = queobj.get()
  803. ssl_time_threshold = min(1, 1.5 * first_ssl_time)
  804. if sock and not isinstance(sock, Exception):
  805. if connection_cache_key and sock.ssl_time < ssl_time_threshold:
  806. self.ssl_connection_cache[connection_cache_key].put((time.time(), sock))
  807. else:
  808. sock.close()
  809. try:
  810. while connection_cache_key:
  811. ctime, sock = self.ssl_connection_cache[connection_cache_key].get_nowait()
  812. if time.time() - ctime < 30:
  813. return sock
  814. except Queue.Empty:
  815. pass
  816. host, port = address
  817. result = None
  818. # create_connection = _create_ssl_connection if not validate else _create_openssl_connection
  819. create_connection = _create_ssl_connection
  820. addresses = [(x, port) for x in self.dns_resolve(host)]
  821. for i in range(self.max_retry):
  822. window = min((self.max_window+1)//2 + min(i, 1), len(addresses))
  823. addresses.sort(key=self.ssl_connection_time.__getitem__)
  824. addrs = addresses[:window] + random.sample(addresses, min(len(addresses), window, self.max_window-window))
  825. queobj = Queue.Queue()
  826. for addr in addrs:
  827. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  828. for i in range(len(addrs)):
  829. result = queobj.get()
  830. if not isinstance(result, Exception):
  831. thread.start_new_thread(_close_ssl_connection, (len(addrs)-i-1, queobj, result.tcp_time, result.ssl_time))
  832. return result
  833. else:
  834. if i == 0:
  835. # only output first error
  836. logging.warning('create_ssl_connection to %s return %r, try again.', addrs, result)
  837. def __create_connection_withproxy(self, address, timeout=None, source_address=None, **kwargs):
  838. host, port = address
  839. logging.debug('__create_connection_withproxy connect (%r, %r)', host, port)
  840. _, proxyuser, proxypass, proxyaddress = ProxyUtil.parse_proxy(self.proxy)
  841. try:
  842. try:
  843. self.dns_resolve(host)
  844. except (socket.error, OSError):
  845. pass
  846. proxyhost, _, proxyport = proxyaddress.rpartition(':')
  847. sock = socket.create_connection((proxyhost, int(proxyport)))
  848. if host in self.dns:
  849. hostname = random.choice(self.dns[host])
  850. elif host.endswith('.appspot.com'):
  851. hostname = 'www.google.com'
  852. else:
  853. hostname = host
  854. request_data = 'CONNECT %s:%s HTTP/1.1\r\n' % (hostname, port)
  855. if proxyuser and proxypass:
  856. request_data += 'Proxy-authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (proxyuser, proxypass)).encode()).decode().strip()
  857. request_data += '\r\n'
  858. sock.sendall(request_data)
  859. response = httplib.HTTPResponse(sock)
  860. response.begin()
  861. if response.status >= 400:
  862. logging.error('__create_connection_withproxy return http error code %s', response.status)
  863. sock = None
  864. return sock
  865. except Exception as e:
  866. logging.error('__create_connection_withproxy error %s', e)
  867. raise
  868. def __create_ssl_connection_withproxy(self, address, timeout=None, source_address=None, **kwargs):
  869. host, port = address
  870. logging.debug('__create_ssl_connection_withproxy connect (%r, %r)', host, port)
  871. try:
  872. sock = self.__create_connection_withproxy(address, timeout, source_address)
  873. ssl_sock = ssl.wrap_socket(sock)
  874. ssl_sock.sock = sock
  875. return ssl_sock
  876. except Exception as e:
  877. logging.error('__create_ssl_connection_withproxy error %s', e)
  878. raise
  879. def forward_socket(self, local, remote, timeout=60, tick=2, bufsize=8192, maxping=None, maxpong=None):
  880. try:
  881. timecount = timeout
  882. while 1:
  883. timecount -= tick
  884. if timecount <= 0:
  885. break
  886. (ins, _, errors) = select.select([local, remote], [], [local, remote], tick)
  887. if errors:
  888. break
  889. if ins:
  890. for sock in ins:
  891. data = sock.recv(bufsize)
  892. if data:
  893. if sock is remote:
  894. local.sendall(data)
  895. timecount = maxpong or timeout
  896. else:
  897. remote.sendall(data)
  898. timecount = maxping or timeout
  899. else:
  900. return
  901. except NetWorkIOError as e:
  902. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  903. raise
  904. finally:
  905. if local:
  906. local.close()
  907. if remote:
  908. remote.close()
  909. def green_forward_socket(self, local, remote, timeout=60, tick=2, bufsize=8192, maxping=None, maxpong=None, pongcallback=None, bitmask=None):
  910. def io_copy(dest, source):
  911. try:
  912. dest.settimeout(timeout)
  913. source.settimeout(timeout)
  914. while 1:
  915. data = source.recv(bufsize)
  916. if not data:
  917. break
  918. if bitmask:
  919. data = ''.join(chr(ord(x) ^ bitmask) for x in data)
  920. dest.sendall(data)
  921. except NetWorkIOError as e:
  922. if e.args[0] not in ('timed out', errno.ECONNABORTED, errno.ECONNRESET, errno.EBADF, errno.EPIPE, errno.ENOTCONN, errno.ETIMEDOUT):
  923. raise
  924. finally:
  925. if local:
  926. local.close()
  927. if remote:
  928. remote.close()
  929. thread.start_new_thread(io_copy, (remote.dup(), local.dup()))
  930. io_copy(local, remote)
  931. def _request(self, sock, method, path, protocol_version, headers, payload, bufsize=8192, crlf=None, return_sock=None):
  932. skip_headers = self.skip_headers
  933. need_crlf = bool(crlf)
  934. if need_crlf:
  935. fakehost = 'www.' + ''.join(random.choice(('bcdfghjklmnpqrstvwxyz','aeiou')[x&1]) for x in xrange(random.randint(5,20))) + random.choice(['.net', '.com', '.org'])
  936. request_data = 'GET / HTTP/1.1\r\nHost: %s\r\n\r\n\r\n\r\r' % fakehost
  937. else:
  938. request_data = ''
  939. request_data += '%s %s %s\r\n' % (method, path, protocol_version)
  940. request_data += ''.join('%s: %s\r\n' % (k.title(), v) for k, v in headers.items() if k.title() not in skip_headers)
  941. if self.proxy:
  942. _, username, password, _ = ProxyUtil.parse_proxy(self.proxy)
  943. if username and password:
  944. request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (username, password)).encode()).decode().strip()
  945. request_data += '\r\n'
  946. if isinstance(payload, bytes):
  947. sock.sendall(request_data.encode() + payload)
  948. elif hasattr(payload, 'read'):
  949. sock.sendall(request_data)
  950. while 1:
  951. data = payload.read(bufsize)
  952. if not data:
  953. break
  954. sock.sendall(data)
  955. else:
  956. raise TypeError('http_util.request(payload) must be a string or buffer, not %r' % type(payload))
  957. if need_crlf:
  958. try:
  959. response = httplib.HTTPResponse(sock)
  960. response.begin()
  961. response.read()
  962. except Exception:
  963. logging.exception('crlf skip read')
  964. return None
  965. if return_sock:
  966. return sock
  967. response = httplib.HTTPResponse(sock, buffering=True)
  968. try:
  969. response.begin()
  970. except httplib.BadStatusLine:
  971. response = None
  972. return response
  973. def request(self, method, url, payload=None, headers={}, realhost='', bufsize=8192, crlf=None, validate=None, return_sock=None, connection_cache_key=None):
  974. scheme, netloc, path, _, query, _ = urlparse.urlparse(url)
  975. if netloc.rfind(':') <= netloc.rfind(']'):
  976. # no port number
  977. host = netloc
  978. port = 443 if scheme == 'https' else 80
  979. else:
  980. host, _, port = netloc.rpartition(':')
  981. port = int(port)
  982. if query:
  983. path += '?' + query
  984. if 'Host' not in headers:
  985. headers['Host'] = host
  986. if payload and 'Content-Length' not in headers:
  987. headers['Content-Length'] = str(len(payload))
  988. for i in range(self.max_retry):
  989. sock = None
  990. ssl_sock = None
  991. try:
  992. if scheme == 'https':
  993. ssl_sock = self.create_ssl_connection((realhost or host, port), self.max_timeout, validate=validate, cache_key=connection_cache_key)
  994. if ssl_sock:
  995. sock = ssl_sock.sock
  996. del ssl_sock.sock
  997. else:
  998. raise socket.error('timed out', 'create_ssl_connection(%r,%r)' % (realhost or host, port))
  999. else:
  1000. sock = self.create_connection((realhost or host, port), self.max_timeout, cache_key=connection_cache_key)
  1001. if sock:
  1002. if scheme == 'https':
  1003. crlf = 0
  1004. return self._request(ssl_sock or sock, method, path, self.protocol_version, headers, payload, bufsize=bufsize, crlf=crlf, return_sock=return_sock)
  1005. except Exception as e:
  1006. logging.debug('request "%s %s" failed:%s', method, url, e)
  1007. if ssl_sock:
  1008. ssl_sock.close()
  1009. if sock:
  1010. sock.close()
  1011. if i == self.max_retry - 1:
  1012. raise
  1013. else:
  1014. continue
  1015. class Common(object):
  1016. """Global Config Object"""
  1017. ENV_CONFIG_PREFIX = 'GOAGENT_'
  1018. def __init__(self):
  1019. """load config from proxy.ini"""
  1020. ConfigParser.RawConfigParser.OPTCRE = re.compile(r'(?P<option>[^=\s][^=]*)\s*(?P<vi>[=])\s*(?P<value>.*)$')
  1021. self.CONFIG = ConfigParser.ConfigParser()
  1022. # GAEProxy Patch
  1023. self.CONFIG_FILENAME = '/data/data/org.gaeproxy/proxy.ini'
  1024. self.CONFIG.read(self.CONFIG_FILENAME)
  1025. self.LISTEN_IP = self.CONFIG.get('listen', 'ip')
  1026. self.LISTEN_PORT = self.CONFIG.getint('listen', 'port')
  1027. self.LISTEN_VISIBLE = self.CONFIG.getint('listen', 'visible')
  1028. self.LISTEN_DEBUGINFO = self.CONFIG.getint('listen', 'debuginfo')
  1029. self.GAE_APPIDS = re.findall(r'[\w\-\.]+', self.CONFIG.get('gae', 'appid').replace('.appspot.com', ''))
  1030. self.GAE_PASSWORD = self.CONFIG.get('gae', 'password').strip()
  1031. self.GAE_PATH = self.CONFIG.get('gae', 'path')
  1032. self.GAE_MODE = self.CONFIG.get('gae', 'mode')
  1033. self.GAE_PROFILE = self.CONFIG.get('gae', 'profile').strip()
  1034. self.GAE_WINDOW = self.CONFIG.getint('gae', 'window')
  1035. self.GAE_VALIDATE = self.CONFIG.getint('gae', 'validate')
  1036. self.GAE_OBFUSCATE = self.CONFIG.getint('gae', 'obfuscate')
  1037. self.GAE_OPTIONS = self.CONFIG.get('gae', 'options')
  1038. hosts_section, http_section = '%s/hosts' % self.GAE_PROFILE, '%s/http' % self.GAE_PROFILE
  1039. self.HOSTS_MAP = collections.OrderedDict((k, v or k) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and not k.startswith('.'))
  1040. self.HOSTS_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and k.startswith('.'))
  1041. self.HOSTS_POSTFIX_ENDSWITH = tuple(self.HOSTS_POSTFIX_MAP)
  1042. self.CONNECT_HOSTS_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and not k.startswith('.'))
  1043. self.CONNECT_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and k.startswith('.'))
  1044. self.CONNECT_POSTFIX_ENDSWITH = tuple(self.CONNECT_POSTFIX_MAP)
  1045. self.METHOD_REMATCH_MAP = collections.OrderedDict((re.compile(k).match, v) for k, v in self.CONFIG.items(hosts_section) if '\\' in k)
  1046. self.METHOD_REMATCH_HAS_LOCALFILE = any(x.startswith('file://') for x in self.METHOD_REMATCH_MAP.values())
  1047. self.HTTP_WITHGAE = tuple(self.CONFIG.get(http_section, 'withgae').split('|'))
  1048. self.HTTP_CRLFSITES = tuple(self.CONFIG.get(http_section, 'crlfsites').split('|'))
  1049. self.HTTP_FORCEHTTPS = set(self.CONFIG.get(http_section, 'forcehttps').split('|'))
  1050. self.HTTP_FAKEHTTPS = set(self.CONFIG.get(http_section, 'fakehttps').split('|'))
  1051. self.HTTP_DNS = self.CONFIG.get(http_section, 'dns').split('|') if self.CONFIG.has_option(http_section, 'dns') else []
  1052. for hostname in [k for k, _ in self.CONFIG.items(hosts_section) if k.startswith(('http://', 'https://', 'https?://'))]:
  1053. m = re.search(r'(?<=//)(\-|\_|\w|\\.)+(?=/)', hostname)
  1054. if m:
  1055. host = m.group().replace('\\.', '.')
  1056. self.HTTP_FAKEHTTPS.add(host)
  1057. logging.info('add host=%r to fakehttps', host)
  1058. self.IPLIST_MAP = collections.OrderedDict((k, v.split('|')) for k, v in self.CONFIG.items('iplist'))
  1059. self.IPLIST_MAP.update((k, [k]) for k, v in self.HOSTS_MAP.items() if k == v)
  1060. # GAEProxy Patch
  1061. # No PAC
  1062. self.PHP_ENABLE = self.CONFIG.getint('php', 'enable')
  1063. self.PHP_LISTEN = self.CONFIG.get('php', 'listen')
  1064. self.PHP_PASSWORD = self.CONFIG.get('php', 'password') if self.CONFIG.has_option('php', 'password') else ''
  1065. self.PHP_CRLF = self.CONFIG.getint('php', 'crlf') if self.CONFIG.has_option('php', 'crlf') else 1
  1066. self.PHP_VALIDATE = self.CONFIG.getint('php', 'validate') if self.CONFIG.has_option('php', 'validate') else 0
  1067. self.PHP_FETCHSERVER = self.CONFIG.get('php', 'fetchserver')
  1068. self.PHP_USEHOSTS = self.CONFIG.getint('php', 'usehosts')
  1069. self.PROXY_ENABLE = self.CONFIG.getint('proxy', 'enable')
  1070. self.PROXY_AUTODETECT = self.CONFIG.getint('proxy', 'autodetect') if self.CONFIG.has_option('proxy', 'autodetect') else 0
  1071. self.PROXY_HOST = self.CONFIG.get('proxy', 'host')
  1072. self.PROXY_PORT = self.CONFIG.getint('proxy', 'port')
  1073. self.PROXY_USERNAME = self.CONFIG.get('proxy', 'username')
  1074. self.PROXY_PASSWROD = self.CONFIG.get('proxy', 'password')
  1075. if not self.PROXY_ENABLE and self.PROXY_AUTODETECT:
  1076. system_proxy = ProxyUtil.get_system_proxy()
  1077. if system_proxy and self.LISTEN_IP not in system_proxy:
  1078. _, username, password, address = ProxyUtil.parse_proxy(system_proxy)
  1079. proxyhost, _, proxyport = address.rpartition(':')
  1080. self.PROXY_ENABLE = 1
  1081. self.PROXY_USERNAME = username
  1082. self.PROXY_PASSWROD = password
  1083. self.PROXY_HOST = proxyhost
  1084. self.PROXY_PORT = int(proxyport)
  1085. if self.PROXY_ENABLE:
  1086. self.GAE_MODE = 'https'
  1087. self.proxy = 'https://%s:%s@%s:%d' % (self.PROXY_USERNAME or '', self.PROXY_PASSWROD or '', self.PROXY_HOST, self.PROXY_PORT)
  1088. else:
  1089. self.proxy = ''
  1090. self.AUTORANGE_HOSTS = self.CONFIG.get('autorange', 'hosts').split('|')
  1091. self.AUTORANGE_HOSTS_MATCH = [re.compile(fnmatch.translate(h)).match for h in self.AUTORANGE_HOSTS]
  1092. self.AUTORANGE_ENDSWITH = tuple(self.CONFIG.get('autorange', 'endswith').split('|'))
  1093. self.AUTORANGE_NOENDSWITH = tuple(self.CONFIG.get('autorange', 'noendswith').split('|'))
  1094. self.AUTORANGE_MAXSIZE = self.CONFIG.getint('autorange', 'maxsize')
  1095. self.AUTORANGE_WAITSIZE = self.CONFIG.getint('autorange', 'waitsize')
  1096. self.AUTORANGE_BUFSIZE = self.CONFIG.getint('autorange', 'bufsize')
  1097. self.AUTORANGE_THREADS = self.CONFIG.getint('autorange', 'threads')
  1098. self.FETCHMAX_LOCAL = self.CONFIG.getint('fetchmax', 'local') if self.CONFIG.get('fetchmax', 'local') else 3
  1099. self.FETCHMAX_SERVER = self.CONFIG.get('fetchmax', 'server')
  1100. self.DNS_ENABLE = self.CONFIG.getint('dns', 'enable')
  1101. self.DNS_LISTEN = self.CONFIG.get('dns', 'listen')
  1102. self.DNS_SERVERS = self.HTTP_DNS or self.CONFIG.get('dns', 'servers').split('|')
  1103. self.DNS_BLACKLIST = set(self.CONFIG.get('dns', 'blacklist').split('|'))
  1104. self.USERAGENT_ENABLE = self.CONFIG.getint('useragent', 'enable')
  1105. self.USERAGENT_STRING = self.CONFIG.get('useragent', 'string')
  1106. self.LOVE_ENABLE = self.CONFIG.getint('love', 'enable')
  1107. # GAEProxy Patch
  1108. self.LOVE_TIP = [re.sub(r'\\u([0-9a-fA-F]{4})', lambda m:unichr(int(m.group(1), 16)), x) for x in self.CONFIG.get('love','tip').split('|')]
  1109. def resolve_iplist(self):
  1110. def do_resolve(host, dnsservers, queue):
  1111. try:
  1112. iplist = dns_remote_resolve(host, dnsservers, self.DNS_BLACKLIST, timeout=2)
  1113. queue.put((host, dnsservers, iplist or []))
  1114. except (socket.error, OSError) as e:
  1115. logging.error('resolve remote host=%r failed: %s', host, e)
  1116. queue.put((host, dnsservers, []))
  1117. # https://support.google.com/websearch/answer/186669?hl=zh-Hans
  1118. google_blacklist = ['216.239.32.20', '74.125.127.102', '74.125.155.102', '74.125.39.102', '74.125.39.113', '209.85.229.138']
  1119. for name, need_resolve_hosts in list(self.IPLIST_MAP.items()):
  1120. if all(re.match(r'\d+\.\d+\.\d+\.\d+', x) or ':' in x for x in need_resolve_hosts):
  1121. continue
  1122. need_resolve_remote = [x for x in need_resolve_hosts if ':' not in x and not re.match(r'\d+\.\d+\.\d+\.\d+', x)]
  1123. resolved_iplist = [x for x in need_resolve_hosts if x not in need_resolve_remote]
  1124. result_queue = Queue.Queue()
  1125. for host in need_resolve_remote:
  1126. for dnsserver in self.DNS_SERVERS:
  1127. logging.debug('resolve remote host=%r from dnsserver=%r', host, dnsserver)
  1128. threading._start_new_thread(do_resolve, (host, [dnsserver], result_queue))
  1129. for _ in xrange(len(self.DNS_SERVERS) * len(need_resolve_remote)):
  1130. try:
  1131. host, dnsservers, iplist = result_queue.get(timeout=2)
  1132. resolved_iplist += iplist or []
  1133. logging.debug('resolve remote host=%r from dnsservers=%s return iplist=%s', host, dnsservers, iplist)
  1134. except Queue.Empty:
  1135. logging.warn('resolve remote timeout, try resolve local')
  1136. resolved_iplist += sum([socket.gethostbyname_ex(x)[-1] for x in need_resolve_remote], [])
  1137. break
  1138. if name.startswith('google_') and name not in ('google_cn', 'google_hk'):
  1139. iplist_prefix = re.split(r'[\.:]', resolved_iplist[0])[0]
  1140. resolved_iplist = list(set(x for x in resolved_iplist if x.startswith(iplist_prefix)))
  1141. else:
  1142. resolved_iplist = list(set(resolved_iplist))
  1143. if name.startswith('google_'):
  1144. resolved_iplist = list(set(resolved_iplist) - set(google_blacklist))
  1145. if len(resolved_iplist) == 0:
  1146. logging.error('resolve %s host return empty! please retry!', name)
  1147. sys.exit(-1)
  1148. logging.info('resolve name=%s host to iplist=%r', name, resolved_iplist)
  1149. common.IPLIST_MAP[name] = resolved_iplist
  1150. def info(self):
  1151. info = ''
  1152. info += '------------------------------------------------------\n'
  1153. info += 'GoAgent Version : %s (python/%s %spyopenssl/%s)\n' % (__version__, sys.version[:5], gevent and 'gevent/%s ' % gevent.__version__ or '', getattr(OpenSSL, '__version__', 'Disabled'))
  1154. info += 'Uvent Version : %s (pyuv/%s libuv/%s)\n' % (__import__('uvent').__version__, __import__('pyuv').__version__, __import__('pyuv').LIBUV_VERSION) if all(x in sys.modules for x in ('pyuv', 'uvent')) else ''
  1155. info += 'Listen Address : %s:%d\n' % (self.LISTEN_IP, self.LISTEN_PORT)
  1156. info += 'Local Proxy : %s:%s\n' % (self.PROXY_HOST, self.PROXY_PORT) if self.PROXY_ENABLE else ''
  1157. info += 'Debug INFO : %s\n' % self.LISTEN_DEBUGINFO if self.LISTEN_DEBUGINFO else ''
  1158. info += 'GAE Mode : %s\n' % self.GAE_MODE
  1159. info += 'GAE Profile : %s\n' % self.GAE_PROFILE if self.GAE_PROFILE else ''
  1160. info += 'GAE APPID : %s\n' % '|'.join(self.GAE_APPIDS)
  1161. info += 'GAE Validate : %s\n' % self.GAE_VALIDATE if self.GAE_VALIDATE else ''
  1162. info += 'GAE Obfuscate : %s\n' % self.GAE_OBFUSCATE if self.GAE_OBFUSCATE else ''
  1163. # GAEProxy Patch
  1164. # No PAC
  1165. if common.PHP_ENABLE:
  1166. info += 'PHP Listen : %s\n' % common.PHP_LISTEN
  1167. info += 'PHP FetchServer : %s\n' % common.PHP_FETCHSERVER
  1168. if common.DNS_ENABLE:
  1169. info += 'DNS Listen : %s\n' % common.DNS_LISTEN
  1170. info += 'DNS Servers : %s\n' % '|'.join(common.DNS_SERVERS)
  1171. info += '------------------------------------------------------\n'
  1172. return info
  1173. common = Common()
  1174. http_util = HTTPUtil(max_window=common.GAE_WINDOW, proxy=common.proxy, dns_servers=common.DNS_SERVERS, dns_blacklist=common.DNS_BLACKLIST)
  1175. def message_html(title, banner, detail=''):
  1176. MESSAGE_TEMPLATE = '''
  1177. <html><head>
  1178. <meta http-equiv="content-type" content="text/html;charset=utf-8">
  1179. <title>$title</title>
  1180. <style><!--
  1181. body {font-family: arial,sans-serif}
  1182. div.nav {margin-top: 1ex}
  1183. div.nav A {font-size: 10pt; font-family: arial,sans-serif}
  1184. span.nav {font-size: 10pt; font-family: arial,sans-serif; font-weight: bold}
  1185. div.nav A,span.big {font-size: 12pt; color: #0000cc}
  1186. div.nav A {font-size: 10pt; color: black}
  1187. A.l:link {color: #6f6f6f}
  1188. A.u:link {color: green}
  1189. //--></style>
  1190. </head>
  1191. <body text=#000000 bgcolor=#ffffff>
  1192. <table border=0 cellpadding=2 cellspacing=0 width=100%>
  1193. <tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff><b>Message</b></td></tr>
  1194. <tr><td> </td></tr></table>
  1195. <blockquote>
  1196. <H1>$banner</H1>
  1197. $detail
  1198. <p>
  1199. </blockquote>
  1200. <table width=100% cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img alt="" width=1 height=4></td></tr></table>
  1201. </body></html>
  1202. '''
  1203. return string.Template(MESSAGE_TEMPLATE).substitute(title=title, banner=banner, detail=detail)
  1204. try:
  1205. from Crypto.Cipher.ARC4 import new as RC4Cipher
  1206. except ImportError:
  1207. logging.warn('Load Crypto.Cipher.ARC4 Failed, Use Pure Python Instead.')
  1208. class RC4Cipher(object):
  1209. def __init__(self, key):
  1210. x = 0
  1211. box = range(256)
  1212. for i, y in enumerate(box):
  1213. x = (x + y + ord(key[i % len(key)])) & 0xff
  1214. box[i], box[x] = box[x], y
  1215. self.__box = box
  1216. self.__x = 0
  1217. self.__y = 0
  1218. def encrypt(self, data):
  1219. out = []
  1220. out_append = out.append
  1221. x = self.__x
  1222. y = self.__y
  1223. box = self.__box
  1224. for char in data:
  1225. x = (x + 1) & 0xff
  1226. y = (y + box[x]) & 0xff
  1227. box[x], box[y] = box[y], box[x]
  1228. out_append(chr(ord(char) ^ box[(box[x] + box[y]) & 0xff]))
  1229. self.__x = x
  1230. self.__y = y
  1231. return ''.join(out)
  1232. class RC4FileObject(object):
  1233. """fileobj for rc4"""
  1234. def __init__(self, stream, key):
  1235. self.__stream = stream
  1236. self.__cipher = RC4Cipher(key) if key else lambda x:x
  1237. def __getattr__(self, attr):
  1238. if attr not in ('__stream', '__cipher'):
  1239. return getattr(self.__stream, attr)
  1240. def read(self, size=-1):
  1241. return self.__cipher.encrypt(self.__stream.read(size))
  1242. class XORCipher(object):
  1243. """XOR Cipher Class"""
  1244. def __init__(self, key):
  1245. self.__key_gen = itertools.cycle([ord(x) for x in key]).next
  1246. self.__key_xor = lambda s: ''.join(chr(ord(x) ^ self.__key_gen()) for x in s)
  1247. if len(key) == 1:
  1248. try:
  1249. from Crypto.Util.strxor import strxor_c
  1250. c = ord(key)
  1251. self.__key_xor = lambda s: strxor_c(s, c)
  1252. except ImportError:
  1253. sys.stderr.write('Load Crypto.Util.strxor Failed, Use Pure Python Instead.\n')
  1254. def encrypt(self, data):
  1255. return self.__key_xor(data)
  1256. def gae_urlfetch(method, url, headers, payload, fetchserver, **kwargs):
  1257. # deflate = lambda x:zlib.compress(x)[2:-4]
  1258. rc4crypt = lambda s, k: RC4Cipher(k).encrypt(s) if k else s
  1259. if payload:
  1260. if len(payload) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  1261. zpayload = zlib.compress(payload)[2:-4]
  1262. if len(zpayload) < len(payload):
  1263. payload = zpayload
  1264. headers['Content-Encoding'] = 'deflate'
  1265. headers['Content-Length'] = str(len(payload))
  1266. # GAE donot allow set `Host` header
  1267. if 'Host' in headers:
  1268. del headers['Host']
  1269. metadata = 'G-Method:%s\nG-Url:%s\n%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v))
  1270. skip_headers = http_util.skip_headers
  1271. metadata += ''.join('%s:%s\n' % (k.title(), v) for k, v in headers.items() if k not in skip_headers)
  1272. # prepare GAE request
  1273. request_method = 'POST'
  1274. request_headers = {}
  1275. if common.GAE_OBFUSCATE:
  1276. if 'rc4' in common.GAE_OPTIONS:
  1277. request_headers['X-GOA-Options'] = 'rc4'
  1278. cookie = base64.b64encode(rc4crypt(zlib.compress(metadata)[2:-4], kwargs.get('password'))).strip()
  1279. payload = rc4crypt(payload, kwargs.get('password'))
  1280. else:
  1281. cookie = base64.b64encode(zlib.compress(metadata)[2:-4]).strip()
  1282. request_headers['Cookie'] = cookie
  1283. if payload:
  1284. request_headers['Content-Length'] = str(len(payload))
  1285. else:
  1286. request_method = 'GET'
  1287. else:
  1288. metadata = zlib.compress(metadata)[2:-4]
  1289. payload = '%s%s%s' % (struct.pack('!h', len(metadata)), metadata, payload)
  1290. if 'rc4' in common.GAE_OPTIONS:
  1291. request_headers['X-GOA-Options'] = 'rc4'
  1292. payload = rc4crypt(payload, kwargs.get('password'))
  1293. request_headers['Content-Length'] = str(len(payload))
  1294. # post data
  1295. need_crlf = 0 if common.GAE_MODE == 'https' else 1
  1296. need_validate = common.GAE_VALIDATE
  1297. connection_cache_key = '%s:%d' % (common.HOSTS_POSTFIX_MAP['.appspot.com'], 443 if common.GAE_MODE == 'https' else 80)
  1298. response = http_util.request(request_method, fetchserver, payload, request_headers, crlf=need_crlf, validate=need_validate, connection_cache_key=connection_cache_key)
  1299. response.app_status = response.status
  1300. response.app_options = response.getheader('X-GOA-Options', '')
  1301. if response.status != 200:
  1302. return response
  1303. data = response.read(4)
  1304. if len(data) < 4:
  1305. response.status = 502
  1306. response.fp = io.BytesIO(b'connection aborted. too short leadtype data=' + data)
  1307. response.read = response.fp.read
  1308. return response
  1309. response.status, headers_length = struct.unpack('!hh', data)
  1310. data = response.read(headers_length)
  1311. if len(data) < headers_length:
  1312. response.status = 502
  1313. response.fp = io.BytesIO(b'connection aborted. too short headers data=' + data)
  1314. response.read = response.fp.read
  1315. return response
  1316. if 'rc4' not in response.app_options:
  1317. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(data, -zlib.MAX_WBITS)))
  1318. else:
  1319. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(rc4crypt(data, kwargs.get('password')), -zlib.MAX_WBITS)))
  1320. if kwargs.get('password') and response.fp:
  1321. response.fp = RC4FileObject(response.fp, kwargs['password'])
  1322. return response
  1323. class RangeFetch(object):
  1324. """Range Fetch Class"""
  1325. maxsize = 1024*1024*4
  1326. bufsize = 8192
  1327. threads = 1
  1328. waitsize = 1024*512
  1329. expect_begin = 0
  1330. def __init__(self, urlfetch, wfile, response, method, url, headers, payload, fetchservers, password, maxsize=0, bufsize=0, waitsize=0, threads=0):
  1331. self.urlfetch = urlfetch
  1332. self.wfile = wfile
  1333. self.response = response
  1334. self.command = method
  1335. self.url = url
  1336. self.headers = headers
  1337. self.payload = payload
  1338. self.fetchservers = fetchservers
  1339. self.password = password
  1340. self.maxsize = maxsize or self.__class__.maxsize
  1341. self.bufsize = bufsize or self.__class__.bufsize
  1342. self.waitsize = waitsize or self.__class__.bufsize
  1343. self.threads = threads or self.__class__.threads
  1344. self._stopped = None
  1345. self._last_app_status = {}
  1346. def fetch(self):
  1347. response_status = self.response.status
  1348. response_headers = dict((k.title(), v) for k, v in self.response.getheaders())
  1349. content_range = response_headers['Content-Range']
  1350. #content_length = response_headers['Content-Length']
  1351. start, end, length = tuple(int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)', content_range).group(1, 2, 3))
  1352. if start == 0:
  1353. response_status = 200
  1354. response_headers['Content-Length'] = str(length)
  1355. del response_headers['Content-Range']
  1356. else:
  1357. response_headers['Content-Range'] = 'bytes %s-%s/%s' % (start, end, length)
  1358. response_headers['Content-Length'] = str(length-start)
  1359. logging.info('>>>>>>>>>>>>>>> RangeFetch started(%r) %d-%d', self.url, start, end)
  1360. self.wfile.write(('HTTP/1.1 %s\r\n%s\r\n' % (response_status, ''.join('%s: %s\r\n' % (k, v) for k, v in response_headers.items()))))
  1361. data_queue = Queue.PriorityQueue()
  1362. range_queue = Queue.PriorityQueue()
  1363. range_queue.put((start, end, self.response))
  1364. for begin in range(end+1, length, self.maxsize):
  1365. range_queue.put((begin, min(begin+self.maxsize-1, length-1), None))
  1366. for i in xrange(0, self.threads):
  1367. range_delay_size = i * self.maxsize
  1368. spawn_later(float(range_delay_size)/self.waitsize, self.__fetchlet, range_queue, data_queue, range_delay_size)
  1369. has_peek = hasattr(data_queue, 'peek')
  1370. peek_timeout = 120
  1371. self.expect_begin = start
  1372. while self.expect_begin < length - 1:
  1373. try:
  1374. if has_peek:
  1375. begin, data = data_queue.peek(timeout=peek_timeout)
  1376. if self.expect_begin == begin:
  1377. data_queue.get()
  1378. elif self.expect_begin < begin:
  1379. time.sleep(0.1)
  1380. continue
  1381. else:
  1382. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1383. break
  1384. else:
  1385. begin, data = data_queue.get(timeout=peek_timeout)
  1386. if self.expect_begin == begin:
  1387. pass
  1388. elif self.expect_begin < begin:
  1389. data_queue.put((begin, data))
  1390. time.sleep(0.1)
  1391. continue
  1392. else:
  1393. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1394. break
  1395. except Queue.Empty:
  1396. logging.error('data_queue peek timeout, break')
  1397. break
  1398. try:
  1399. self.wfile.write(data)
  1400. self.expect_begin += len(data)
  1401. del data
  1402. except Exception as e:
  1403. logging.info('RangeFetch client connection aborted(%s).', e)
  1404. break
  1405. self._stopped = True
  1406. def __fetchlet(self, range_queue, data_queue, range_delay_size):
  1407. headers = dict((k.title(), v) for k, v in self.headers.items())
  1408. headers['Connection'] = 'close'
  1409. while 1:
  1410. try:
  1411. if self._stopped:
  1412. return
  1413. try:
  1414. start, end, response = range_queue.get(timeout=1)
  1415. if self.expect_begin < start and data_queue.qsize() * self.bufsize + range_delay_size > 30*1024*1024:
  1416. range_queue.put((start, end, response))
  1417. time.sleep(10)
  1418. continue
  1419. headers['Range'] = 'bytes=%d-%d' % (start, end)
  1420. fetchserver = ''
  1421. if not response:
  1422. fetchserver = random.choice(self.fetchservers)
  1423. if self._last_app_status.get(fetchserver, 200) >= 500:
  1424. time.sleep(5)
  1425. response = self.urlfetch(self.command, self.url, headers, self.payload, fetchserver, password=self.password)
  1426. except Queue.Empty:
  1427. continue
  1428. except Exception as e:
  1429. logging.warning("Response %r in __fetchlet", e)
  1430. range_queue.put((start, end, None))
  1431. continue
  1432. if not response:
  1433. logging.warning('RangeFetch %s return %r', headers['Range'], response)
  1434. range_queue.put((start, end, None))
  1435. continue
  1436. if fetchserver:
  1437. self._last_app_status[fetchserver] = response.app_status
  1438. if response.app_status != 200:
  1439. logging.warning('Range Fetch "%s %s" %s return %s', self.command, self.url, headers['Range'], response.app_status)
  1440. response.close()
  1441. range_queue.put((start, end, None))
  1442. continue
  1443. if response.getheader('Location'):
  1444. self.url = urlparse.urljoin(self.url, response.getheader('Location'))
  1445. logging.info('RangeFetch Redirect(%r)', self.url)
  1446. response.close()
  1447. range_queue.put((start, end, None))
  1448. continue
  1449. if 200 <= response.status < 300:
  1450. content_range = response.getheader('Content-Range')
  1451. if not content_range:
  1452. logging.warning('RangeFetch "%s %s" return Content-Range=%r: response headers=%r', self.command, self.url, content_range, response.getheaders())
  1453. response.close()
  1454. range_queue.put((start, end, None))
  1455. continue
  1456. content_length = int(response.getheader('Content-Length', 0))
  1457. logging.info('>>>>>>>>>>>>>>> [thread %s] %s %s', threading.currentThread().ident, content_length, content_range)
  1458. while 1:
  1459. try:
  1460. if self._stopped:
  1461. response.close()
  1462. return
  1463. data = response.read(self.bufsize)
  1464. if not data:
  1465. break
  1466. data_queue.put((start, data))
  1467. start += len(data)
  1468. except Exception as e:
  1469. logging.warning('RangeFetch "%s %s" %s failed: %s', self.command, self.url, headers['Range'], e)
  1470. break
  1471. if start < end + 1:
  1472. logging.warning('RangeFetch "%s %s" retry %s-%s', self.command, self.url, start, end)
  1473. response.close()
  1474. range_queue.put((start, end, None))
  1475. continue
  1476. logging.info('>>>>>>>>>>>>>>> Successfully reached %d bytes.', start - 1)
  1477. else:
  1478. logging.error('RangeFetch %r return %s', self.url, response.status)
  1479. response.close()
  1480. range_queue.put((start, end, None))
  1481. continue
  1482. except Exception as e:
  1483. logging.exception('RangeFetch._fetchlet error:%s', e)
  1484. raise
  1485. class LocalProxyServer(SocketServer.ThreadingTCPServer):
  1486. """Local Proxy Server"""
  1487. allow_reuse_address = True
  1488. def close_request(self, request):
  1489. try:
  1490. request.close()
  1491. except Exception:
  1492. pass
  1493. def finish_request(self, request, client_address):
  1494. try:
  1495. self.RequestHandlerClass(request, client_address, self)
  1496. except NetWorkIOError as e:
  1497. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1498. raise
  1499. def handle_error(self, *args):
  1500. """make ThreadingTCPServer happy"""
  1501. exc_info = sys.exc_info()
  1502. error = exc_info and len(exc_info) and exc_info[1]
  1503. if isinstance(error, NetWorkIOError) and len(error.args) > 1 and 'bad write retry' in error.args[1]:
  1504. exc_info = error = None
  1505. else:
  1506. del exc_info, error
  1507. SocketServer.ThreadingTCPServer.handle_error(self, *args)
  1508. def expand_google_hk_iplist(domains, max_count=100):
  1509. iplist = sum([socket.gethostbyname_ex(x)[-1] for x in domains if not re.match(r'\d+\.\d+\.\d+\.\d+', x)], [])
  1510. cranges = set(x.rpartition('.')[0] for x in iplist)
  1511. need_expand = list(set(['%s.%d' % (c, i) for c in cranges for i in xrange(1, 254)]) - set(iplist))
  1512. random.shuffle(need_expand)
  1513. ip_connection_time = {}
  1514. for ip in need_expand:
  1515. if len(ip_connection_time) >= max_count:
  1516. break
  1517. sock = None
  1518. ssl_sock = None
  1519. try:
  1520. start_time = time.time()
  1521. request = urllib2.Request('https://%s/2' % ip, headers={'Host': 'goagent.appspot.com'})
  1522. urllib2.build_opener(urllib2.ProxyHandler({})).open(request)
  1523. ip_connection_time[(ip, 443)] = time.time() - start_time
  1524. except socket.error as e:
  1525. logging.debug('expand_google_hk_iplist(%s) error: %r', ip, e)
  1526. except urllib2.HTTPError as e:
  1527. if e.code == 404 and 'google' in e.headers.get('Server', '').lower():
  1528. logging.debug('expand_google_hk_iplist(%s) OK', ip)
  1529. ip_connection_time[(ip, 443)] = time.time() - start_time
  1530. else:
  1531. logging.debug('expand_google_hk_iplist(%s) error: %r', ip, e.code)
  1532. except urllib2.URLError as e:
  1533. logging.debug('expand_google_hk_iplist(%s) error: %r', ip, e)
  1534. except Exception as e:
  1535. logging.warn('expand_google_hk_iplist(%s) error: %r', ip, e)
  1536. finally:
  1537. if sock:
  1538. sock.close()
  1539. if ssl_sock:
  1540. ssl_sock.close()
  1541. time.sleep(2)
  1542. http_util.tcp_connection_time.update(ip_connection_time)
  1543. http_util.ssl_connection_time.update(ip_connection_time)
  1544. common.IPLIST_MAP['google_hk'] += [x[0] for x in ip_connection_time]
  1545. logging.info('expand_google_hk_iplist end. iplist=%s', ip_connection_time)
  1546. class GAEProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
  1547. bufsize = 256*1024
  1548. first_run_lock = threading.Lock()
  1549. urlfetch = staticmethod(gae_urlfetch)
  1550. normcookie = functools.partial(re.compile(', ([^ =]+(?:=|$))').sub, '\\r\\nSet-Cookie: \\1')
  1551. normattachment = functools.partial(re.compile(r'filename=([^"\']+)').sub, 'filename="\\1"')
  1552. def first_run(self):
  1553. """GAEProxyHandler setup, init domain/iplist map"""
  1554. if common.GAE_VALIDATE or common.GAE_OBFUSCATE:
  1555. http_util.set_openssl_option(validate=common.GAE_VALIDATE, obfuscate=common.GAE_OBFUSCATE)
  1556. if not common.PROXY_ENABLE:
  1557. if 'google_hk' in common.IPLIST_MAP:
  1558. # threading._start_new_thread(expand_google_hk_iplist, (common.IPLIST_MAP['google_hk'][:], 16))
  1559. pass
  1560. logging.info('resolve common.IPLIST_MAP names=%s to iplist', list(common.IPLIST_MAP))
  1561. common.resolve_iplist()
  1562. if len(common.GAE_APPIDS) > 10:
  1563. random.shuffle(common.GAE_APPIDS)
  1564. for appid in common.GAE_APPIDS:
  1565. host = '%s.appspot.com' % appid
  1566. if host not in common.HOSTS_MAP:
  1567. common.HOSTS_MAP[host] = common.HOSTS_POSTFIX_MAP['.appspot.com']
  1568. if host not in http_util.dns:
  1569. http_util.dns[host] = common.IPLIST_MAP[common.HOSTS_MAP[host]]
  1570. def setup(self):
  1571. if isinstance(self.__class__.first_run, collections.Callable):
  1572. try:
  1573. with self.__class__.first_run_lock:
  1574. if isinstance(self.__class__.first_run, collections.Callable):
  1575. self.first_run()
  1576. self.__class__.first_run = None
  1577. except Exception as e:
  1578. logging.exception('GAEProxyHandler.first_run() return %r', e)
  1579. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  1580. self.__class__.do_GET = self.__class__.do_METHOD
  1581. self.__class__.do_PUT = self.__class__.do_METHOD
  1582. self.__class__.do_POST = self.__class__.do_METHOD
  1583. self.__class__.do_HEAD = self.__class__.do_METHOD
  1584. self.__class__.do_DELETE = self.__class__.do_METHOD
  1585. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  1586. self.setup()
  1587. def finish(self):
  1588. """make python2 BaseHTTPRequestHandler happy"""
  1589. try:
  1590. BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
  1591. except NetWorkIOError as e:
  1592. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1593. raise
  1594. def address_string(self):
  1595. return '%s:%s' % self.client_address[:2]
  1596. def do_METHOD(self):
  1597. if HAS_PYPY:
  1598. self.path = re.sub(r'(://[^/]+):\d+/', '\\1/', self.path)
  1599. host = self.headers.get('Host', '')
  1600. if self.path[0] == '/' and host:
  1601. self.path = 'http://%s%s' % (host, self.path)
  1602. elif not host and '://' in self.path:
  1603. host = urlparse.urlparse(self.path).netloc
  1604. self.url_parts = urlparse.urlparse(self.path)
  1605. if common.USERAGENT_ENABLE:
  1606. self.headers['User-Agent'] = common.USERAGENT_STRING
  1607. if host.endswith(common.HTTP_WITHGAE):
  1608. return self.do_METHOD_AGENT()
  1609. if host in common.HTTP_FORCEHTTPS and not self.headers.get('Referer', '').startswith('https://') and not self.path.startswith('https://'):
  1610. return self.wfile.write(('HTTP/1.1 301\r\nLocation: %s\r\n\r\n' % self.path.replace('http://', 'https://', 1)).encode())
  1611. if self.command not in ('GET', 'POST', 'HEAD', 'PUT', 'DELETE', 'PATCH'):
  1612. return self.do_METHOD_FWD()
  1613. if any(x(self.path) for x in common.METHOD_REMATCH_MAP) or host in common.HOSTS_MAP or host.endswith(common.HOSTS_POSTFIX_ENDSWITH):
  1614. return self.do_METHOD_FWD()
  1615. else:
  1616. return self.do_METHOD_AGENT()
  1617. def do_METHOD_FWD(self):
  1618. """Direct http forward"""
  1619. response = None
  1620. try:
  1621. content_length = int(self.headers.get('Content-Length', 0))
  1622. payload = self.rfile.read(content_length) if content_length else b''
  1623. host = self.url_parts.netloc
  1624. if any(x(self.path) for x in common.METHOD_REMATCH_MAP):
  1625. hostname = next(common.METHOD_REMATCH_MAP[x] for x in common.METHOD_REMATCH_MAP if x(self.path))
  1626. elif host in common.HOSTS_MAP:
  1627. hostname = common.HOSTS_MAP[host]
  1628. elif host.endswith(common.HOSTS_POSTFIX_ENDSWITH):
  1629. hostname = next(common.HOSTS_POSTFIX_MAP[x] for x in common.HOSTS_POSTFIX_MAP if host.endswith(x))
  1630. common.HOSTS_MAP[host] = hostname
  1631. else:
  1632. hostname = host
  1633. if common.METHOD_REMATCH_HAS_LOCALFILE and hostname.startswith('file://'):
  1634. filename = hostname.lstrip('file://')
  1635. if os.name == 'nt':
  1636. filename = filename.lstrip('/')
  1637. content_type = None
  1638. try:
  1639. import mimetypes
  1640. content_type = mimetypes.types_map.get(os.path.splitext(filename)[1])
  1641. except Exception as e:
  1642. logging.error('import mimetypes failed: %r', e)
  1643. try:
  1644. with open(filename, 'rb') as fp:
  1645. data = fp.read()
  1646. self.wfile.write('HTTP/1.1 200\r\n')
  1647. self.wfile.write('Connection: close\r\n')
  1648. self.wfile.write('Content-Length: %s\r\n' % len(data))
  1649. if content_type:
  1650. self.wfile.write('Content-Type: %s\r\n' % content_type)
  1651. self.wfile.write('\r\n')
  1652. self.wfile.write(data)
  1653. except Exception as e:
  1654. self.wfile.write('HTTP/1.1 403\r\n')
  1655. self.wfile.write('Connection: close\r\n')
  1656. self.wfile.write('\r\n')
  1657. self.wfile.write('open %r failed: %r' % (filename, e))
  1658. finally:
  1659. logging.info('%r matched local file %r, return', self.path, filename)
  1660. return
  1661. need_crlf = hostname.startswith('google_') or host.endswith(common.HTTP_CRLFSITES)
  1662. hostname = hostname or host
  1663. if hostname in common.IPLIST_MAP:
  1664. http_util.dns[host] = common.IPLIST_MAP[hostname]
  1665. else:
  1666. http_util.dns[host] = sum((http_util.dns_resolve(x) for x in hostname.split('|')), [])
  1667. validate = common.GAE_VALIDATE if host not in common.HTTP_FAKEHTTPS else None
  1668. connection_cache_key = hostname if host not in common.HTTP_FAKEHTTPS else None
  1669. response = http_util.request(self.command, self.path, payload, self.headers, crlf=need_crlf, validate=validate, connection_cache_key=connection_cache_key)
  1670. if not response:
  1671. return
  1672. logging.info('%s "FWD %s %s HTTP/1.1" %s %s', self.address_string(), self.command, self.path, response.status, response.getheader('Content-Length', '-'))
  1673. self.wfile.write(('HTTP/1.1 %s\r\n%s\r\n' % (response.status, ''.join('%s: %s\r\n' % (k.title(), v) for k, v in response.getheaders() if k.title() != 'Transfer-Encoding'))))
  1674. while True:
  1675. data = response.read(8192)
  1676. if not data:
  1677. break
  1678. self.wfile.write(data)
  1679. del data
  1680. response.close()
  1681. except NetWorkIOError as e:
  1682. if response:
  1683. response.close()
  1684. if e.args[0] in (errno.ECONNRESET, 10063, errno.ENAMETOOLONG):
  1685. logging.warn('http_util.request "%s %s" failed:%s, try addto `withgae`', self.command, self.path, e)
  1686. common.HTTP_WITHGAE.add(re.sub(r':\d+$', '', self.url_parts.netloc))
  1687. elif e.args[0] not in (errno.ECONNABORTED, errno.EPIPE):
  1688. raise
  1689. except Exception as e:
  1690. host = self.headers.get('Host', '')
  1691. logging.warn('GAEProxyHandler direct(%s) Error', host)
  1692. raise
  1693. def do_METHOD_AGENT(self):
  1694. """GAE http urlfetch"""
  1695. request_headers = dict((k.title(), v) for k, v in self.headers.items())
  1696. host = request_headers.get('Host', '')
  1697. path = self.url_parts.path
  1698. need_autorange = any(x(host) for x in common.AUTORANGE_HOSTS_MATCH) or path.endswith(common.AUTORANGE_ENDSWITH)
  1699. if path.endswith(common.AUTORANGE_NOENDSWITH) or 'range=' in self.url_parts.query or self.command == 'HEAD':
  1700. need_autorange = False
  1701. if self.command != 'HEAD' and 'Range' in request_headers:
  1702. m = re.search(r'bytes=(\d+)-', request_headers['Range'])
  1703. start = int(m.group(1) if m else 0)
  1704. request_headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  1705. logging.info('autorange range=%r match url=%r', request_headers['Range'], self.path)
  1706. elif need_autorange:
  1707. logging.info('Found [autorange]endswith match url=%r', self.path)
  1708. m = re.search(r'bytes=(\d+)-', request_headers.get('Range', ''))
  1709. start = int(m.group(1) if m else 0)
  1710. request_headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  1711. payload = b''
  1712. if 'Content-Length' in request_headers:
  1713. try:
  1714. payload = self.rfile.read(int(request_headers.get('Content-Length', 0)))
  1715. except NetWorkIOError as e:
  1716. logging.error('handle_method_urlfetch read payload failed:%s', e)
  1717. return
  1718. response = None
  1719. errors = []
  1720. headers_sent = False
  1721. get_fetchserver = lambda i: '%s://%s.appspot.com%s?' % (common.GAE_MODE, common.GAE_APPIDS[i] if i is not None else random.choice(common.GAE_APPIDS), common.GAE_PATH)
  1722. for retry in range(common.FETCHMAX_LOCAL):
  1723. fetchserver = get_fetchserver(0 if not need_autorange else None)
  1724. try:
  1725. content_length = 0
  1726. kwargs = {}
  1727. if common.GAE_PASSWORD:
  1728. kwargs['password'] = common.GAE_PASSWORD
  1729. if common.GAE_VALIDATE:
  1730. kwargs['validate'] = 1
  1731. response = self.urlfetch(self.command, self.path, request_headers, payload, fetchserver, **kwargs)
  1732. if not response and retry == common.FETCHMAX_LOCAL-1:
  1733. html = message_html('502 URLFetch failed', 'Local URLFetch %r failed' % self.path, str(errors))
  1734. self.wfile.write(b'HTTP/1.0 502\r\nContent-Type: text/html\r\n\r\n' + html.encode('utf-8'))
  1735. return
  1736. # gateway error, switch to https mode
  1737. if response.app_status in (400, 504):
  1738. common.GAE_MODE = 'https'
  1739. continue
  1740. # appid not exists, try remove it from appid
  1741. if response.app_status == 404:
  1742. if len(common.GAE_APPIDS) > 1:
  1743. appid = common.GAE_APPIDS.pop(0)
  1744. logging.warning('APPID %r not exists, remove it.', appid)
  1745. continue
  1746. else:
  1747. appid = common.GAE_APPIDS[0]
  1748. logging.error('APPID %r not exists, please ensure your appid in proxy.ini.', appid)
  1749. html = message_html('404 Appid Not Exists', 'Appid %r Not Exists' % appid, 'appid %r not exist, please edit your proxy.ini' % appid)
  1750. self.wfile.write(b'HTTP/1.0 502\r\nContent-Type: text/html\r\n\r\n' + html.encode('utf-8'))
  1751. return
  1752. # appid over qouta, switch to next appid
  1753. if response.app_status == 503:
  1754. if len(common.GAE_APPIDS) > 1:
  1755. common.GAE_APPIDS.pop(0)
  1756. logging.info('Current APPID Over Quota,Auto Switch to [%s], Retrying…' % (common.GAE_APPIDS[0]))
  1757. self.do_METHOD_AGENT()
  1758. return
  1759. else:
  1760. logging.error('All APPID Over Quota')
  1761. if response.app_status == 500 and need_autorange:
  1762. fetchserver = get_fetchserver(None)
  1763. logging.warning('500 with range in query, trying another fetchserver=%r', fetchserver)
  1764. continue
  1765. if response.app_status != 200 and retry == common.FETCHMAX_LOCAL-1:
  1766. logging.info('%s "GAE %s %s HTTP/1.1" %s -', self.address_string(), self.command, self.path, response.status)
  1767. self.wfile.write(('HTTP/1.1 %s\r\n%s\r\n' % (response.status, ''.join('%s: %s\r\n' % (k.title(), v) for k, v in response.getheaders() if k.title() != 'Transfer-Encoding'))))
  1768. self.wfile.write(response.read())
  1769. response.close()
  1770. return
  1771. # first response, has no retry.
  1772. if not headers_sent:
  1773. logging.info('%s "GAE %s %s HTTP/1.1" %s %s', self.address_string(), self.command, self.path, response.status, response.getheader('Content-Length', '-'))
  1774. if response.status == 206:
  1775. fetchservers = [get_fetchserver(i) for i in xrange(len(common.GAE_APPIDS))]
  1776. rangefetch = RangeFetch(gae_urlfetch, self.wfile, response, self.command, self.path, self.headers, payload, fetchservers, common.GAE_PASSWORD, maxsize=common.AUTORANGE_MAXSIZE, bufsize=common.AUTORANGE_BUFSIZE, waitsize=common.AUTORANGE_WAITSIZE, threads=common.AUTORANGE_THREADS)
  1777. return rangefetch.fetch()
  1778. if response.getheader('Set-Cookie'):
  1779. response.msg['Set-Cookie'] = self.normcookie(response.getheader('Set-Cookie'))
  1780. if response.getheader('Content-Disposition') and '"' not in response.getheader('Content-Disposition'):
  1781. response.msg['Content-Disposition'] = self.normattachment(response.getheader('Content-Disposition'))
  1782. headers_data = 'HTTP/1.1 %s\r\n%s\r\n' % (response.status, ''.join('%s: %s\r\n' % (k.title(), v) for k, v in response.getheaders() if k.title() != 'Transfer-Encoding'))
  1783. logging.debug('headers_data=%s', headers_data)
  1784. #self.wfile.write(headers_data.encode() if bytes is not str else headers_data)
  1785. self.wfile.write(headers_data)
  1786. headers_sent = True
  1787. content_length = int(response.getheader('Content-Length', 0))
  1788. content_range = response.getheader('Content-Range', '')
  1789. accept_ranges = response.getheader('Accept-Ranges', 'none')
  1790. if content_range:
  1791. start, end, length = tuple(int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)', content_range).group(1, 2, 3))
  1792. else:
  1793. start, end, length = 0, content_length-1, content_length
  1794. while True:
  1795. data = response.read(8192)
  1796. if not data:
  1797. response.close()
  1798. return
  1799. start += len(data)
  1800. self.wfile.write(data)
  1801. del data
  1802. if start >= end:
  1803. response.close()
  1804. return
  1805. except Exception as e:
  1806. errors.append(e)
  1807. if response:
  1808. response.close()
  1809. if e.args[0] in (0, errno.ECONNABORTED, errno.EPIPE):
  1810. logging.debug('GAEProxyHandler.do_METHOD_AGENT return %r', e)
  1811. elif e.args[0] in (errno.ECONNRESET, errno.ETIMEDOUT, errno.ENETUNREACH, 11004):
  1812. # connection reset or timeout, switch to https
  1813. common.GAE_MODE = 'https'
  1814. elif e.args[0] == errno.ETIMEDOUT or isinstance(e.args[0], str) and 'timed out' in e.args[0]:
  1815. if content_length and accept_ranges == 'bytes':
  1816. # we can retry range fetch here
  1817. logging.warn('GAEProxyHandler.do_METHOD_AGENT timed out, url=%r, content_length=%r, try again', self.path, content_length)
  1818. self.headers['Range'] = 'bytes=%d-%d' % (start, end)
  1819. elif isinstance(e, NetWorkIOError) and 'bad write retry' in e.args[-1]:
  1820. logging.info('GAEProxyHandler.do_METHOD_AGENT url=%r return %r, abort.', self.path, e)
  1821. return
  1822. else:
  1823. logging.exception('GAEProxyHandler.do_METHOD_AGENT %r return %r, try again', self.path, e)
  1824. def do_CONNECT(self):
  1825. """handle CONNECT cmmand, socket forward or deploy a fake cert"""
  1826. host, _, port = self.path.rpartition(':')
  1827. if host in common.HTTP_FAKEHTTPS or host.endswith(common.HTTP_WITHGAE):
  1828. return self.do_CONNECT_AGENT()
  1829. elif self.path in common.CONNECT_HOSTS_MAP or self.path.endswith(common.CONNECT_POSTFIX_ENDSWITH):
  1830. return self.do_CONNECT_FWD()
  1831. elif host in common.HOSTS_MAP or host.endswith(common.HOSTS_POSTFIX_ENDSWITH):
  1832. return self.do_CONNECT_FWD()
  1833. else:
  1834. return self.do_CONNECT_AGENT()
  1835. def do_CONNECT_FWD(self):
  1836. """socket forward for http CONNECT command"""
  1837. host, _, port = self.path.rpartition(':')
  1838. # GAEProxy Patch
  1839. domain = DNSCacheUtil.getHost(host)
  1840. if domain:
  1841. host = domain
  1842. port = int(port)
  1843. logging.info('%s "FWD %s %s:%d HTTP/1.1" - -', self.address_string(), self.command, host, port)
  1844. #http_headers = ''.join('%s: %s\r\n' % (k, v) for k, v in self.headers.items())
  1845. if not common.PROXY_ENABLE:
  1846. self.wfile.write(b'HTTP/1.1 200 OK\r\n\r\n')
  1847. data = self.connection.recv(1024)
  1848. for i in range(5):
  1849. try:
  1850. if self.path in common.CONNECT_HOSTS_MAP:
  1851. hostname = common.CONNECT_HOSTS_MAP[self.path]
  1852. elif self.path.endswith(common.CONNECT_POSTFIX_ENDSWITH):
  1853. hostname = next(common.CONNECT_POSTFIX_MAP[x] for x in common.CONNECT_POSTFIX_MAP if self.path.endswith(x))
  1854. common.CONNECT_HOSTS_MAP[self.path] = hostname
  1855. elif host in common.HOSTS_MAP:
  1856. hostname = common.HOSTS_MAP[host]
  1857. elif host.endswith(common.HOSTS_POSTFIX_ENDSWITH):
  1858. hostname = next(common.HOSTS_POSTFIX_MAP[x] for x in common.HOSTS_POSTFIX_MAP if host.endswith(x))
  1859. common.HOSTS_MAP[host] = hostname
  1860. else:
  1861. hostname = host
  1862. hostname = hostname or host
  1863. if hostname in common.IPLIST_MAP:
  1864. http_util.dns[host] = common.IPLIST_MAP[hostname]
  1865. else:
  1866. http_util.dns[host] = sum((http_util.dns_resolve(x) for x in hostname.split('|')), [])
  1867. #connection_cache_key = '%s:%d' % (hostname or host, port)
  1868. connection_cache_key = None
  1869. timeout = 4
  1870. remote = http_util.create_connection((host, port), timeout, cache_key=connection_cache_key)
  1871. if remote is not None and data:
  1872. remote.sendall(data)
  1873. break
  1874. elif i == 0:
  1875. # only logging first create_connection error
  1876. logging.error('http_util.create_connection((host=%r, port=%r), %r) timeout', host, port, timeout)
  1877. except NetWorkIOError as e:
  1878. if e.args[0] == 9:
  1879. logging.error('GAEProxyHandler direct forward remote (%r, %r) failed', host, port)
  1880. continue
  1881. else:
  1882. raise
  1883. if hasattr(remote, 'fileno'):
  1884. # reset timeout default to avoid long http upload failure, but it will delay timeout retry :(
  1885. remote.settimeout(None)
  1886. http_util.forward_socket(self.connection, remote, bufsize=self.bufsize)
  1887. else:
  1888. hostip = random.choice(http_util.dns_resolve(host))
  1889. remote = http_util.create_connection((hostip, int(port)), timeout=4)
  1890. if not remote:
  1891. logging.error('GAEProxyHandler proxy connect remote (%r, %r) failed', host, port)
  1892. return
  1893. self.wfile.write(b'HTTP/1.1 200 OK\r\n\r\n')
  1894. http_util.forward_socket(self.connection, remote, bufsize=self.bufsize)
  1895. def do_CONNECT_AGENT(self):
  1896. """deploy fake cert to client"""
  1897. host, _, port = self.path.rpartition(':')
  1898. # GAEProxy Patch
  1899. domain = DNSCacheUtil.getHost(host)
  1900. if domain:
  1901. host = domain
  1902. port = int(port)
  1903. certfile = CertUtil.get_cert(host)
  1904. logging.info('%s "AGENT %s %s:%d HTTP/1.1" - -', self.address_string(), self.command, host, port)
  1905. self.__realconnection = None
  1906. self.wfile.write(b'HTTP/1.1 200 OK\r\n\r\n')
  1907. try:
  1908. ssl_sock = ssl.wrap_socket(self.connection, keyfile=certfile, certfile=certfile, server_side=True)
  1909. # if not http_util.ssl_validate and not http_util.ssl_obfuscate:
  1910. # ssl_sock = ssl.wrap_socket(self.connection, keyfile=certfile, certfile=certfile, server_side=True)
  1911. # else:
  1912. # ssl_context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
  1913. # ssl_context.use_privatekey_file(certfile)
  1914. # ssl_context.use_certificate_file(certfile)
  1915. # ssl_sock = SSLConnection(ssl_context, self.connection)
  1916. # ssl_sock.set_accept_state()
  1917. # ssl_sock.do_handshake()
  1918. except Exception as e:
  1919. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  1920. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  1921. return
  1922. self.__realconnection = self.connection
  1923. self.__realwfile = self.wfile
  1924. self.__realrfile = self.rfile
  1925. self.connection = ssl_sock
  1926. self.rfile = self.connection.makefile('rb', self.bufsize)
  1927. self.wfile = self.connection.makefile('wb', 0)
  1928. try:
  1929. self.raw_requestline = self.rfile.readline(65537)
  1930. if len(self.raw_requestline) > 65536:
  1931. self.requestline = ''
  1932. self.request_version = ''
  1933. self.command = ''
  1934. self.send_error(414)
  1935. return
  1936. if not self.raw_requestline:
  1937. self.close_connection = 1
  1938. return
  1939. if not self.parse_request():
  1940. return
  1941. except NetWorkIOError as e:
  1942. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1943. raise
  1944. if self.path[0] == '/' and host:
  1945. self.path = 'https://%s%s' % (self.headers['Host'], self.path)
  1946. try:
  1947. self.do_METHOD()
  1948. except NetWorkIOError as e:
  1949. if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
  1950. raise
  1951. finally:
  1952. if self.__realconnection:
  1953. try:
  1954. self.__realconnection.shutdown(socket.SHUT_WR)
  1955. self.__realconnection.close()
  1956. except NetWorkIOError:
  1957. pass
  1958. finally:
  1959. self.__realconnection = None
  1960. def php_urlfetch(method, url, headers, payload, fetchserver, **kwargs):
  1961. if payload:
  1962. if len(payload) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  1963. zpayload = zlib.compress(payload)[2:-4]
  1964. if len(zpayload) < len(payload):
  1965. payload = zpayload
  1966. headers['Content-Encoding'] = 'deflate'
  1967. headers['Content-Length'] = str(len(payload))
  1968. skip_headers = http_util.skip_headers
  1969. if common.PHP_VALIDATE:
  1970. kwargs['validate'] = 1
  1971. metadata = 'G-Method:%s\nG-Url:%s\n%s%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v), ''.join('%s:%s\n' % (k, v) for k, v in headers.items() if k not in skip_headers))
  1972. metadata = zlib.compress(metadata)[2:-4]
  1973. app_payload = b''.join((struct.pack('!h', len(metadata)), metadata, payload))
  1974. app_headers = {'Content-Length': len(app_payload), 'Content-Type': 'application/octet-stream'}
  1975. fetchserver += '?%s' % random.random()
  1976. crlf = 0 if fetchserver.startswith('https') else common.PHP_CRLF
  1977. connection_cache_key = '%s//:%s' % urlparse.urlparse(fetchserver)[:2]
  1978. response = http_util.request('POST', fetchserver, app_payload, app_headers, crlf=crlf, connection_cache_key=connection_cache_key)
  1979. if not response:
  1980. raise socket.error(errno.ECONNRESET, 'urlfetch %r return None' % url)
  1981. response.app_status = response.status
  1982. if response.status != 200:
  1983. if response.status in (400, 405):
  1984. # filter by some firewall
  1985. common.PHP_CRLF = 0
  1986. return response
  1987. return response
  1988. class PHPProxyHandler(GAEProxyHandler):
  1989. urlfetch = staticmethod(php_urlfetch)
  1990. first_run_lock = threading.Lock()
  1991. def first_run(self):
  1992. if not common.PROXY_ENABLE:
  1993. common.resolve_iplist()
  1994. fetchhost = re.sub(r':\d+$', '', urlparse.urlparse(common.PHP_FETCHSERVER).netloc)
  1995. logging.info('resolve common.PHP_FETCHSERVER domain=%r to iplist', fetchhost)
  1996. if common.PHP_USEHOSTS and fetchhost in common.HOSTS_MAP:
  1997. hostname = common.HOSTS_MAP[fetchhost]
  1998. fetchhost_iplist = sum([socket.gethostbyname_ex(x)[-1] for x in common.IPLIST_MAP.get(hostname) or hostname.split('|')], [])
  1999. else:
  2000. fetchhost_iplist = http_util.dns_resolve(fetchhost)
  2001. if len(fetchhost_iplist) == 0:
  2002. logging.error('resolve %r domain return empty! please use ip list to replace domain list!', fetchhost)
  2003. sys.exit(-1)
  2004. http_util.dns[fetchhost] = list(set(fetchhost_iplist))
  2005. logging.info('resolve common.PHP_FETCHSERVER domain to iplist=%r', fetchhost_iplist)
  2006. return True
  2007. def setup(self):
  2008. if isinstance(self.__class__.first_run, collections.Callable):
  2009. try:
  2010. with self.__class__.first_run_lock:
  2011. if isinstance(self.__class__.first_run, collections.Callable):
  2012. self.first_run()
  2013. self.__class__.first_run = None
  2014. except NetWorkIOError as e:
  2015. logging.error('PHPProxyHandler.first_run() return %r', e)
  2016. except Exception as e:
  2017. logging.exception('PHPProxyHandler.first_run() return %r', e)
  2018. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  2019. if common.PHP_USEHOSTS:
  2020. self.__class__.do_GET = self.__class__.do_METHOD
  2021. self.__class__.do_PUT = self.__class__.do_METHOD
  2022. self.__class__.do_POST = self.__class__.do_METHOD
  2023. self.__class__.do_HEAD = self.__class__.do_METHOD
  2024. self.__class__.do_DELETE = self.__class__.do_METHOD
  2025. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  2026. self.__class__.do_CONNECT = GAEProxyHandler.do_CONNECT
  2027. else:
  2028. self.__class__.do_GET = self.__class__.do_METHOD_AGENT
  2029. self.__class__.do_PUT = self.__class__.do_METHOD_AGENT
  2030. self.__class__.do_POST = self.__class__.do_METHOD_AGENT
  2031. self.__class__.do_HEAD = self.__class__.do_METHOD_AGENT
  2032. self.__class__.do_DELETE = self.__class__.do_METHOD_AGENT
  2033. self.__class__.do_OPTIONS = self.__class__.do_METHOD_AGENT
  2034. self.__class__.do_CONNECT = GAEProxyHandler.do_CONNECT_AGENT
  2035. self.setup()
  2036. def do_METHOD_AGENT(self):
  2037. response = None
  2038. try:
  2039. headers = dict((k.title(), v) for k, v in self.headers.items())
  2040. payload = b''
  2041. if 'Content-Length' in headers:
  2042. try:
  2043. payload = self.rfile.read(int(headers.get('Content-Length', 0)))
  2044. except NetWorkIOError as e:
  2045. logging.error('handle_method read payload failed:%s', e)
  2046. return
  2047. errors = []
  2048. for _ in range(common.FETCHMAX_LOCAL):
  2049. try:
  2050. kwargs = {}
  2051. if common.PHP_PASSWORD:
  2052. kwargs['password'] = common.PHP_PASSWORD
  2053. if common.PHP_VALIDATE:
  2054. kwargs['validate'] = 1
  2055. response = self.urlfetch(self.command, self.path, headers, payload, common.PHP_FETCHSERVER, **kwargs)
  2056. if response:
  2057. break
  2058. except Exception as e:
  2059. errors.append(e)
  2060. if response is None:
  2061. html = message_html('502 php URLFetch failed', 'Local php URLFetch %r failed' % self.path, str(errors))
  2062. self.wfile.write(b'HTTP/1.0 502\r\nContent-Type: text/html\r\n\r\n' + html.encode('utf-8'))
  2063. return
  2064. logging.info('%s "PHP %s %s HTTP/1.1" %s -', self.address_string(), self.command, self.path, response.status)
  2065. if response.status != 200:
  2066. self.wfile.write('HTTP/1.1 %s\r\n%s\r\n' % (response.status, ''.join('%s: %s\r\n' % (k, v) for k, v in response.getheaders())))
  2067. cipher = response.status == 200 and response.getheader('Content-Type', '') == 'image/gif' and XORCipher(common.PHP_PASSWORD[0])
  2068. while True:
  2069. data = response.read(8192)
  2070. if not data:
  2071. response.close()
  2072. break
  2073. if cipher:
  2074. data = cipher.encrypt(data)
  2075. self.wfile.write(data)
  2076. del data
  2077. except NetWorkIOError as e:
  2078. # Connection closed before proxy return
  2079. if response:
  2080. response.close()
  2081. if e.args[0] not in (errno.ECONNABORTED, errno.EPIPE):
  2082. raise
  2083. def get_uptime():
  2084. if os.name == 'nt':
  2085. import ctypes
  2086. try:
  2087. tick = ctypes.windll.kernel32.GetTickCount64()
  2088. except AttributeError:
  2089. tick = ctypes.windll.kernel32.GetTickCount()
  2090. return tick / 1000.0
  2091. elif os.path.isfile('/proc/uptime'):
  2092. with open('/proc/uptime', 'rb') as fp:
  2093. uptime = fp.readline().strip().split()[0].strip()
  2094. return float(uptime)
  2095. elif any(os.path.isfile(os.path.join(x, 'uptime')) for x in os.environ['PATH'].split(os.pathsep)):
  2096. # http://www.opensource.apple.com/source/lldb/lldb-69/test/pexpect-2.4/examples/uptime.py
  2097. pattern = r'up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])'
  2098. output = os.popen('uptime').read()
  2099. duration, _, _, _, _ = re.search(pattern, output).groups()
  2100. days, hours, mins = 0, 0, 0
  2101. if 'day' in duration:
  2102. m = re.search(r'([0-9]+)\s+day', duration)
  2103. days = int(m.group(1))
  2104. if ':' in duration:
  2105. m = re.search(r'([0-9]+):([0-9]+)', duration)
  2106. hours = int(m.group(1))
  2107. mins = int(m.group(2))
  2108. if 'min' in duration:
  2109. m = re.search(r'([0-9]+)\s+min', duration)
  2110. mins = int(m.group(1))
  2111. return days * 86400 + hours * 3600 + mins * 60
  2112. else:
  2113. #TODO: support other platforms
  2114. return None
  2115. # GAEProxy Patch
  2116. # No PAC
  2117. def get_process_list():
  2118. import os
  2119. import glob
  2120. import ctypes
  2121. import collections
  2122. Process = collections.namedtuple('Process', 'pid name exe')
  2123. process_list = []
  2124. if os.name == 'nt':
  2125. PROCESS_QUERY_INFORMATION = 0x0400
  2126. PROCESS_VM_READ = 0x0010
  2127. lpidProcess= (ctypes.c_ulong * 1024)()
  2128. cb = ctypes.sizeof(lpidProcess)
  2129. cbNeeded = ctypes.c_ulong()
  2130. ctypes.windll.psapi.EnumProcesses(ctypes.byref(lpidProcess), cb, ctypes.byref(cbNeeded))
  2131. nReturned = cbNeeded.value/ctypes.sizeof(ctypes.c_ulong())
  2132. pidProcess = [i for i in lpidProcess][:nReturned]
  2133. has_queryimage = hasattr(ctypes.windll.kernel32, 'QueryFullProcessImageNameA')
  2134. for pid in pidProcess:
  2135. hProcess = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, pid)
  2136. if hProcess:
  2137. modname = ctypes.create_string_buffer(2048)
  2138. count = ctypes.c_ulong(ctypes.sizeof(modname))
  2139. if has_queryimage:
  2140. ctypes.windll.kernel32.QueryFullProcessImageNameA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2141. else:
  2142. ctypes.windll.psapi.GetModuleFileNameExA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2143. exe = modname.value
  2144. name = os.path.basename(exe)
  2145. process_list.append(Process(pid=pid, name=name, exe=exe))
  2146. ctypes.windll.kernel32.CloseHandle(hProcess)
  2147. elif sys.platform.startswith('linux'):
  2148. for filename in glob.glob('/proc/[0-9]*/cmdline'):
  2149. pid = int(filename.split('/')[2])
  2150. exe_link = '/proc/%d/exe' % pid
  2151. if os.path.exists(exe_link):
  2152. exe = os.readlink(exe_link)
  2153. name = os.path.basename(exe)
  2154. process_list.append(Process(pid=pid, name=name, exe=exe))
  2155. else:
  2156. try:
  2157. import psutil
  2158. process_list = psutil.get_process_list()
  2159. except Exception as e:
  2160. logging.exception('psutil.get_process_list() failed: %r', e)
  2161. return process_list
  2162. def pre_start():
  2163. # GAEProxy Patch
  2164. if common.GAE_APPIDS[0] == 'goagent':
  2165. logging.critical('please edit %s to add your appid to [gae] !', common.CONFIG_FILENAME)
  2166. sys.exit(-1)
  2167. if common.GAE_MODE == 'http' and common.GAE_PASSWORD == '':
  2168. logging.critical('to enable http mode, you should set %r [gae]password = <your_pass> and [gae]options = rc4', common.CONFIG_FILENAME)
  2169. sys.exit(-1)
  2170. # GAEProxy Patch
  2171. # No PAC
  2172. # No dnslib
  2173. if os.name == 'nt' and not common.DNS_ENABLE:
  2174. any(common.DNS_SERVERS.insert(0, x) for x in [y for y in get_dnsserver_list() if y not in common.DNS_SERVERS])
  2175. if not OpenSSL:
  2176. logging.warning('python-openssl not found, please install it!')
  2177. if 'uvent.loop' in sys.modules and isinstance(gevent.get_hub().loop, __import__('uvent').loop.UVLoop):
  2178. logging.info('Uvent enabled, patch forward_socket')
  2179. http_util.forward_socket = http_util.green_forward_socket
  2180. def main():
  2181. global __file__
  2182. __file__ = os.path.abspath(__file__)
  2183. if os.path.islink(__file__):
  2184. __file__ = getattr(os, 'readlink', lambda x: x)(__file__)
  2185. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  2186. logging.basicConfig(level=logging.DEBUG if common.LISTEN_DEBUGINFO else logging.INFO, format='%(levelname)s - %(asctime)s %(message)s', datefmt='[%b %d %H:%M:%S]')
  2187. pre_start()
  2188. CertUtil.check_ca()
  2189. sys.stdout.write(common.info())
  2190. # GAEProxy Patch
  2191. # Do the UNIX double-fork magic.
  2192. try:
  2193. pid = os.fork()
  2194. if pid > 0:
  2195. # exit first parent
  2196. sys.exit(0)
  2197. except OSError, e:
  2198. print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
  2199. sys.exit(1)
  2200. # decouple from parent environment
  2201. os.setsid()
  2202. os.umask(0)
  2203. # do second fork
  2204. try:
  2205. pid = os.fork()
  2206. if pid > 0:
  2207. sys.exit(0)
  2208. except OSError, e:
  2209. print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror)
  2210. sys.exit(1)
  2211. # GAEProxy Patch
  2212. pid = str(os.getpid())
  2213. f = open('/data/data/org.gaeproxy/python.pid','a')
  2214. f.write(" ")
  2215. f.write(pid)
  2216. f.close()
  2217. # GAEProxy Patch
  2218. # No dnslib
  2219. # GAEProxy Patch
  2220. if common.PHP_ENABLE:
  2221. host, port = common.PHP_LISTEN.split(':')
  2222. server = LocalProxyServer((host, int(port)), PHPProxyHandler)
  2223. server.serve_forever()
  2224. else:
  2225. server = LocalProxyServer((common.LISTEN_IP, common.LISTEN_PORT), GAEProxyHandler)
  2226. server.serve_forever()
  2227. if __name__ == '__main__':
  2228. main()