PageRenderTime 83ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/local/proxy.py

https://github.com/bjumpper/goagent
Python | 3095 lines | 3008 code | 41 blank | 46 comment | 106 complexity | c1af3c78cac0f97438b1b25490f10db5 MD5 | raw file
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. # Based on GAppProxy 2.0.0 by Du XiaoGang <dugang.2008@gmail.com>
  4. # Based on WallProxy 0.4.0 by Hust Moon <www.ehust@gmail.com>
  5. # Contributor:
  6. # Phus Lu <phus.lu@gmail.com>
  7. # Hewig Xu <hewigovens@gmail.com>
  8. # Ayanamist Yang <ayanamist@gmail.com>
  9. # V.E.O <V.E.O@tom.com>
  10. # Max Lv <max.c.lv@gmail.com>
  11. # AlsoTang <alsotang@gmail.com>
  12. # Christopher Meng <i@cicku.me>
  13. # Yonsm Guo <YonsmGuo@gmail.com>
  14. # Parkman <cseparkman@gmail.com>
  15. # Ming Bai <mbbill@gmail.com>
  16. # Bin Yu <yubinlove1991@gmail.com>
  17. # lileixuan <lileixuan@gmail.com>
  18. # Cong Ding <cong@cding.org>
  19. # Zhang Youfu <zhangyoufu@gmail.com>
  20. # Lu Wei <luwei@barfoo>
  21. # Harmony Meow <harmony.meow@gmail.com>
  22. # logostream <logostream@gmail.com>
  23. # Rui Wang <isnowfy@gmail.com>
  24. # Wang Wei Qiang <wwqgtxx@gmail.com>
  25. # Felix Yan <felixonmars@gmail.com>
  26. # Sui Feng <suifeng.me@qq.com>
  27. # QXO <qxodream@gmail.com>
  28. # Geek An <geekan@foxmail.com>
  29. # Poly Rabbit <mcx_221@foxmail.com>
  30. # oxnz <yunxinyi@gmail.com>
  31. # Shusen Liu <liushusen.smart@gmail.com>
  32. # Yad Smood <y.s.inside@gmail.com>
  33. # Chen Shuang <cs0x7f@gmail.com>
  34. # cnfuyu <cnfuyu@gmail.com>
  35. # cuixin <steven.cuixin@gmail.com>
  36. # s2marine0 <s2marine0@gmail.com>
  37. # Toshio Xiang <snachx@gmail.com>
  38. # Bo Tian <dxmtb@163.com>
  39. # Virgil <variousvirgil@gmail.com>
  40. # hub01 <miaojiabumiao@yeah.net>
  41. # v3aqb <sgzz.cj@gmail.com>
  42. # Oling Cat <olingcat@gmail.com>
  43. __version__ = '3.1.11'
  44. import sys
  45. import os
  46. import glob
  47. reload(sys).setdefaultencoding('UTF-8')
  48. sys.dont_write_bytecode = True
  49. sys.path += glob.glob('%s/*.egg' % os.path.dirname(os.path.abspath(__file__)))
  50. try:
  51. import gevent
  52. import gevent.socket
  53. import gevent.server
  54. import gevent.queue
  55. import gevent.monkey
  56. gevent.monkey.patch_all(subprocess=True)
  57. except ImportError:
  58. gevent = None
  59. except TypeError:
  60. gevent.monkey.patch_all()
  61. sys.stderr.write('\033[31m Warning: Please update gevent to the latest 1.0 version!\033[0m\n')
  62. import errno
  63. import time
  64. import struct
  65. import collections
  66. import binascii
  67. import zlib
  68. import itertools
  69. import re
  70. import io
  71. import fnmatch
  72. import traceback
  73. import random
  74. import base64
  75. import string
  76. import hashlib
  77. import threading
  78. import thread
  79. import socket
  80. import ssl
  81. import select
  82. import Queue
  83. import SocketServer
  84. import ConfigParser
  85. import BaseHTTPServer
  86. import httplib
  87. import urllib2
  88. import urlparse
  89. try:
  90. import dnslib
  91. except ImportError:
  92. dnslib = None
  93. try:
  94. import OpenSSL
  95. except ImportError:
  96. OpenSSL = None
  97. try:
  98. import pygeoip
  99. except ImportError:
  100. pygeoip = None
  101. HAS_PYPY = hasattr(sys, 'pypy_version_info')
  102. NetWorkIOError = (socket.error, ssl.SSLError, OSError) if not OpenSSL else (socket.error, ssl.SSLError, OpenSSL.SSL.Error, OSError)
  103. class Logging(type(sys)):
  104. CRITICAL = 50
  105. FATAL = CRITICAL
  106. ERROR = 40
  107. WARNING = 30
  108. WARN = WARNING
  109. INFO = 20
  110. DEBUG = 10
  111. NOTSET = 0
  112. def __init__(self, *args, **kwargs):
  113. self.level = self.__class__.INFO
  114. self.__set_error_color = lambda: None
  115. self.__set_warning_color = lambda: None
  116. self.__set_debug_color = lambda: None
  117. self.__reset_color = lambda: None
  118. if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
  119. if os.name == 'nt':
  120. import ctypes
  121. SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute
  122. GetStdHandle = ctypes.windll.kernel32.GetStdHandle
  123. self.__set_error_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x04)
  124. self.__set_warning_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x06)
  125. self.__set_debug_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x002)
  126. self.__reset_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x07)
  127. elif os.name == 'posix':
  128. self.__set_error_color = lambda: sys.stderr.write('\033[31m')
  129. self.__set_warning_color = lambda: sys.stderr.write('\033[33m')
  130. self.__set_debug_color = lambda: sys.stderr.write('\033[32m')
  131. self.__reset_color = lambda: sys.stderr.write('\033[0m')
  132. @classmethod
  133. def getLogger(cls, *args, **kwargs):
  134. return cls(*args, **kwargs)
  135. def basicConfig(self, *args, **kwargs):
  136. self.level = int(kwargs.get('level', self.__class__.INFO))
  137. if self.level > self.__class__.DEBUG:
  138. self.debug = self.dummy
  139. def log(self, level, fmt, *args, **kwargs):
  140. sys.stderr.write('%s - [%s] %s\n' % (level, time.ctime()[4:-5], fmt % args))
  141. def dummy(self, *args, **kwargs):
  142. pass
  143. def debug(self, fmt, *args, **kwargs):
  144. self.__set_debug_color()
  145. self.log('DEBUG', fmt, *args, **kwargs)
  146. self.__reset_color()
  147. def info(self, fmt, *args, **kwargs):
  148. self.log('INFO', fmt, *args)
  149. def warning(self, fmt, *args, **kwargs):
  150. self.__set_warning_color()
  151. self.log('WARNING', fmt, *args, **kwargs)
  152. self.__reset_color()
  153. def warn(self, fmt, *args, **kwargs):
  154. self.warning(fmt, *args, **kwargs)
  155. def error(self, fmt, *args, **kwargs):
  156. self.__set_error_color()
  157. self.log('ERROR', fmt, *args, **kwargs)
  158. self.__reset_color()
  159. def exception(self, fmt, *args, **kwargs):
  160. self.error(fmt, *args, **kwargs)
  161. sys.stderr.write(traceback.format_exc() + '\n')
  162. def critical(self, fmt, *args, **kwargs):
  163. self.__set_error_color()
  164. self.log('CRITICAL', fmt, *args, **kwargs)
  165. self.__reset_color()
  166. logging = sys.modules['logging'] = Logging('logging')
  167. class LRUCache(object):
  168. """http://pypi.python.org/pypi/lru/"""
  169. def __init__(self, max_items=100):
  170. self.cache = {}
  171. self.key_order = []
  172. self.max_items = max_items
  173. def __setitem__(self, key, value):
  174. self.cache[key] = value
  175. self._mark(key)
  176. def __getitem__(self, key):
  177. value = self.cache[key]
  178. self._mark(key)
  179. return value
  180. def __contains__(self, key):
  181. return key in self.cache
  182. def _mark(self, key):
  183. if key in self.key_order:
  184. self.key_order.remove(key)
  185. self.key_order.insert(0, key)
  186. if len(self.key_order) > self.max_items:
  187. index = self.max_items // 2
  188. delitem = self.cache.__delitem__
  189. key_order = self.key_order
  190. any(delitem(key_order[x]) for x in xrange(index, len(key_order)))
  191. self.key_order = self.key_order[:index]
  192. def clear(self):
  193. self.cache = {}
  194. self.key_order = []
  195. class CertUtil(object):
  196. """CertUtil module, based on mitmproxy"""
  197. ca_vendor = 'GoAgent'
  198. ca_keyfile = 'CA.crt'
  199. ca_certdir = 'certs'
  200. ca_lock = threading.Lock()
  201. @staticmethod
  202. def create_ca():
  203. key = OpenSSL.crypto.PKey()
  204. key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  205. ca = OpenSSL.crypto.X509()
  206. ca.set_serial_number(0)
  207. ca.set_version(2)
  208. subj = ca.get_subject()
  209. subj.countryName = 'CN'
  210. subj.stateOrProvinceName = 'Internet'
  211. subj.localityName = 'Cernet'
  212. subj.organizationName = CertUtil.ca_vendor
  213. subj.organizationalUnitName = '%s Root' % CertUtil.ca_vendor
  214. subj.commonName = '%s CA' % CertUtil.ca_vendor
  215. ca.gmtime_adj_notBefore(0)
  216. ca.gmtime_adj_notAfter(24 * 60 * 60 * 3652)
  217. ca.set_issuer(ca.get_subject())
  218. ca.set_pubkey(key)
  219. ca.add_extensions([
  220. OpenSSL.crypto.X509Extension(b'basicConstraints', True, b'CA:TRUE'),
  221. OpenSSL.crypto.X509Extension(b'nsCertType', True, b'sslCA'),
  222. OpenSSL.crypto.X509Extension(b'extendedKeyUsage', True, b'serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC'),
  223. OpenSSL.crypto.X509Extension(b'keyUsage', False, b'keyCertSign, cRLSign'),
  224. OpenSSL.crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', subject=ca), ])
  225. ca.sign(key, 'sha1')
  226. return key, ca
  227. @staticmethod
  228. def dump_ca():
  229. key, ca = CertUtil.create_ca()
  230. with open(CertUtil.ca_keyfile, 'wb') as fp:
  231. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, ca))
  232. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key))
  233. @staticmethod
  234. def _get_cert(commonname, sans=()):
  235. with open(CertUtil.ca_keyfile, 'rb') as fp:
  236. content = fp.read()
  237. key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, content)
  238. ca = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, content)
  239. pkey = OpenSSL.crypto.PKey()
  240. pkey.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  241. req = OpenSSL.crypto.X509Req()
  242. subj = req.get_subject()
  243. subj.countryName = 'CN'
  244. subj.stateOrProvinceName = 'Internet'
  245. subj.localityName = 'Cernet'
  246. subj.organizationalUnitName = '%s Branch' % CertUtil.ca_vendor
  247. if commonname[0] == '.':
  248. subj.commonName = '*' + commonname
  249. subj.organizationName = '*' + commonname
  250. sans = ['*'+commonname] + [x for x in sans if x != '*'+commonname]
  251. else:
  252. subj.commonName = commonname
  253. subj.organizationName = commonname
  254. sans = [commonname] + [x for x in sans if x != commonname]
  255. #req.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans)).encode()])
  256. req.set_pubkey(pkey)
  257. req.sign(pkey, 'sha1')
  258. cert = OpenSSL.crypto.X509()
  259. cert.set_version(2)
  260. try:
  261. cert.set_serial_number(int(hashlib.md5(commonname.encode('utf-8')).hexdigest(), 16))
  262. except OpenSSL.SSL.Error:
  263. cert.set_serial_number(int(time.time()*1000))
  264. cert.gmtime_adj_notBefore(0)
  265. cert.gmtime_adj_notAfter(60 * 60 * 24 * 3652)
  266. cert.set_issuer(ca.get_subject())
  267. cert.set_subject(req.get_subject())
  268. cert.set_pubkey(req.get_pubkey())
  269. if commonname[0] == '.':
  270. sans = ['*'+commonname] + [s for s in sans if s != '*'+commonname]
  271. else:
  272. sans = [commonname] + [s for s in sans if s != commonname]
  273. #cert.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  274. cert.sign(key, 'sha1')
  275. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  276. with open(certfile, 'wb') as fp:
  277. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert))
  278. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, pkey))
  279. return certfile
  280. @staticmethod
  281. def get_cert(commonname, sans=()):
  282. if commonname.count('.') >= 2 and [len(x) for x in reversed(commonname.split('.'))] > [2, 4]:
  283. commonname = '.'+commonname.partition('.')[-1]
  284. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  285. if os.path.exists(certfile):
  286. return certfile
  287. elif OpenSSL is None:
  288. return CertUtil.ca_keyfile
  289. else:
  290. with CertUtil.ca_lock:
  291. if os.path.exists(certfile):
  292. return certfile
  293. return CertUtil._get_cert(commonname, sans)
  294. @staticmethod
  295. def import_ca(certfile):
  296. commonname = os.path.splitext(os.path.basename(certfile))[0]
  297. sha1digest = 'AB:70:2C:DF:18:EB:E8:B4:38:C5:28:69:CD:4A:5D:EF:48:B4:0E:33'
  298. if OpenSSL:
  299. try:
  300. with open(certfile, 'rb') as fp:
  301. x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, fp.read())
  302. commonname = next(v.decode() for k, v in x509.get_subject().get_components() if k == b'O')
  303. sha1digest = x509.digest('sha1')
  304. except StandardError as e:
  305. logging.error('load_certificate(certfile=%r) failed:%s', certfile, e)
  306. if sys.platform.startswith('win'):
  307. import ctypes
  308. with open(certfile, 'rb') as fp:
  309. certdata = fp.read()
  310. if certdata.startswith(b'-----'):
  311. begin = b'-----BEGIN CERTIFICATE-----'
  312. end = b'-----END CERTIFICATE-----'
  313. certdata = base64.b64decode(b''.join(certdata[certdata.find(begin)+len(begin):certdata.find(end)].strip().splitlines()))
  314. crypt32 = ctypes.WinDLL(b'crypt32.dll'.decode())
  315. store_handle = crypt32.CertOpenStore(10, 0, 0, 0x4000 | 0x20000, b'ROOT'.decode())
  316. if not store_handle:
  317. return -1
  318. X509_ASN_ENCODING = 0x00000001
  319. CERT_FIND_HASH = 0x10000
  320. class CRYPT_HASH_BLOB(ctypes.Structure):
  321. _fields_ = [('cbData', ctypes.c_ulong), ('pbData', ctypes.c_char_p)]
  322. crypt_hash = CRYPT_HASH_BLOB(20, binascii.a2b_hex(sha1digest.replace(':', '')))
  323. crypt_handle = crypt32.CertFindCertificateInStore(store_handle, X509_ASN_ENCODING, 0, CERT_FIND_HASH, ctypes.byref(crypt_hash), None)
  324. if crypt_handle:
  325. crypt32.CertFreeCertificateContext(crypt_handle)
  326. return 0
  327. ret = crypt32.CertAddEncodedCertificateToStore(store_handle, 0x1, certdata, len(certdata), 4, None)
  328. crypt32.CertCloseStore(store_handle, 0)
  329. del crypt32
  330. return 0 if ret else -1
  331. elif sys.platform == 'darwin':
  332. return os.system(('security find-certificate -a -c "%s" | grep "%s" >/dev/null || security add-trusted-cert -d -r trustRoot -k "/Library/Keychains/System.keychain" "%s"' % (commonname, commonname, certfile.decode('utf-8'))).encode('utf-8'))
  333. elif sys.platform.startswith('linux'):
  334. import platform
  335. platform_distname = platform.dist()[0]
  336. if platform_distname == 'Ubuntu':
  337. pemfile = "/etc/ssl/certs/%s.pem" % commonname
  338. new_certfile = "/usr/local/share/ca-certificates/%s.crt" % commonname
  339. if not os.path.exists(pemfile):
  340. return os.system('cp "%s" "%s" && update-ca-certificates' % (certfile, new_certfile))
  341. elif any(os.path.isfile('%s/certutil' % x) for x in os.environ['PATH'].split(os.pathsep)):
  342. return os.system('certutil -L -d sql:$HOME/.pki/nssdb | grep "%s" || certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "%s" -i "%s"' % (commonname, commonname, certfile))
  343. else:
  344. logging.warning('please install *libnss3-tools* package to import GoAgent root ca')
  345. return 0
  346. @staticmethod
  347. def check_ca():
  348. #Check CA exists
  349. capath = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_keyfile)
  350. certdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_certdir)
  351. if not os.path.exists(capath):
  352. if not OpenSSL:
  353. logging.critical('CA.key is not exist and OpenSSL is disabled, ABORT!')
  354. sys.exit(-1)
  355. if os.path.exists(certdir):
  356. if os.path.isdir(certdir):
  357. any(os.remove(x) for x in glob.glob(certdir+'/*.crt')+glob.glob(certdir+'/.*.crt'))
  358. else:
  359. os.remove(certdir)
  360. os.mkdir(certdir)
  361. CertUtil.dump_ca()
  362. if glob.glob('%s/*.key' % CertUtil.ca_certdir):
  363. for filename in glob.glob('%s/*.key' % CertUtil.ca_certdir):
  364. try:
  365. os.remove(filename)
  366. os.remove(os.path.splitext(filename)[0]+'.crt')
  367. except EnvironmentError:
  368. pass
  369. #Check CA imported
  370. if CertUtil.import_ca(capath) != 0:
  371. logging.warning('install root certificate failed, Please run as administrator/root/sudo')
  372. #Check Certs Dir
  373. if not os.path.exists(certdir):
  374. os.makedirs(certdir)
  375. class DetectMobileBrowser:
  376. """detect mobile function from http://detectmobilebrowsers.com"""
  377. regex_match_a = re.compile(r"(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", re.I|re.M).search
  378. regex_match_b = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\\-|your|zeto|zte\\-", re.I|re.M).search
  379. @staticmethod
  380. def detect(user_agent):
  381. return DetectMobileBrowser.regex_match_a(user_agent) or DetectMobileBrowser.regex_match_b(user_agent)
  382. class SSLConnection(object):
  383. has_gevent = socket.socket is getattr(sys.modules.get('gevent.socket'), 'socket', None)
  384. def __init__(self, context, sock):
  385. self._context = context
  386. self._sock = sock
  387. self._connection = OpenSSL.SSL.Connection(context, sock)
  388. self._makefile_refs = 0
  389. if self.has_gevent:
  390. self._wait_read = gevent.socket.wait_read
  391. self._wait_write = gevent.socket.wait_write
  392. self._wait_readwrite = gevent.socket.wait_readwrite
  393. else:
  394. self._wait_read = lambda fd,t: select.select([fd], [], [fd], t)
  395. self._wait_write = lambda fd,t: select.select([], [fd], [fd], t)
  396. self._wait_readwrite = lambda fd,t: select.select([fd], [fd], [fd], t)
  397. def __getattr__(self, attr):
  398. if attr not in ('_context', '_sock', '_connection', '_makefile_refs'):
  399. return getattr(self._connection, attr)
  400. def accept(self):
  401. sock, addr = self._sock.accept()
  402. client = OpenSSL.SSL.Connection(sock._context, sock)
  403. return client, addr
  404. def do_handshake(self):
  405. timeout = self._sock.gettimeout()
  406. while True:
  407. try:
  408. self._connection.do_handshake()
  409. break
  410. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError, OpenSSL.SSL.WantWriteError):
  411. sys.exc_clear()
  412. self._wait_readwrite(self._sock.fileno(), timeout)
  413. def connect(self, *args, **kwargs):
  414. timeout = self._sock.gettimeout()
  415. while True:
  416. try:
  417. self._connection.connect(*args, **kwargs)
  418. break
  419. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  420. sys.exc_clear()
  421. self._wait_read(self._sock.fileno(), timeout)
  422. except OpenSSL.SSL.WantWriteError:
  423. sys.exc_clear()
  424. self._wait_write(self._sock.fileno(), timeout)
  425. def send(self, data, flags=0):
  426. timeout = self._sock.gettimeout()
  427. while True:
  428. try:
  429. self._connection.send(data, flags)
  430. break
  431. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  432. sys.exc_clear()
  433. self._wait_read(self._sock.fileno(), timeout)
  434. except OpenSSL.SSL.WantWriteError:
  435. sys.exc_clear()
  436. self._wait_write(self._sock.fileno(), timeout)
  437. except OpenSSL.SSL.SysCallError as e:
  438. if e[0] == -1 and not data:
  439. # errors when writing empty strings are expected and can be ignored
  440. return 0
  441. raise
  442. def recv(self, bufsiz, flags=0):
  443. timeout = self._sock.gettimeout()
  444. pending = self._connection.pending()
  445. if pending:
  446. return self._connection.recv(min(pending, bufsiz))
  447. while True:
  448. try:
  449. return self._connection.recv(bufsiz, flags)
  450. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  451. sys.exc_clear()
  452. self._wait_read(self._sock.fileno(), timeout)
  453. except OpenSSL.SSL.WantWriteError:
  454. sys.exc_clear()
  455. self._wait_write(self._sock.fileno(), timeout)
  456. except OpenSSL.SSL.ZeroReturnError:
  457. return ''
  458. def read(self, bufsiz, flags=0):
  459. return self.recv(bufsiz, flags)
  460. def write(self, buf, flags=0):
  461. return self.sendall(buf, flags)
  462. def close(self):
  463. if self._makefile_refs < 1:
  464. self._connection = None
  465. if self._sock:
  466. socket.socket.close(self._sock)
  467. else:
  468. self._makefile_refs -= 1
  469. def makefile(self, mode='r', bufsize=-1):
  470. self._makefile_refs += 1
  471. return socket._fileobject(self, mode, bufsize, close=True)
  472. class ProxyUtil(object):
  473. """ProxyUtil module, based on urllib2"""
  474. @staticmethod
  475. def parse_proxy(proxy):
  476. return urllib2._parse_proxy(proxy)
  477. @staticmethod
  478. def get_system_proxy():
  479. proxies = urllib2.getproxies()
  480. return proxies.get('https') or proxies.get('http') or {}
  481. @staticmethod
  482. def get_listen_ip():
  483. listen_ip = '127.0.0.1'
  484. sock = None
  485. try:
  486. sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  487. sock.connect(('8.8.8.8', 53))
  488. listen_ip = sock.getsockname()[0]
  489. except socket.error:
  490. pass
  491. finally:
  492. if sock:
  493. sock.close()
  494. return listen_ip
  495. def dnslib_resolve_over_udp(qname, dnsservers, timeout, **kwargs):
  496. """
  497. http://gfwrev.blogspot.com/2009/11/gfwdns.html
  498. http://zh.wikipedia.org/wiki/%E5%9F%9F%E5%90%8D%E6%9C%8D%E5%8A%A1%E5%99%A8%E7%BC%93%E5%AD%98%E6%B1%A1%E6%9F%93
  499. http://support.microsoft.com/kb/241352
  500. """
  501. blacklist = kwargs.get('blacklist', ())
  502. turstservers = kwargs.get('turstservers', ())
  503. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(qname))
  504. query_data = query.pack()
  505. dns_v4_servers = [x for x in dnsservers if ':' not in x]
  506. dns_v6_servers = [x for x in dnsservers if ':' in x]
  507. sock_v4 = sock_v6 = None
  508. socks = []
  509. if dns_v4_servers:
  510. sock_v4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  511. socks.append(sock_v4)
  512. if dns_v6_servers:
  513. sock_v6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
  514. socks.append(sock_v6)
  515. timeout_at = time.time() + timeout
  516. try:
  517. for _ in xrange(4):
  518. try:
  519. for dnsserver in dns_v4_servers:
  520. sock_v4.sendto(query_data, (dnsserver, 53))
  521. for dnsserver in dns_v6_servers:
  522. sock_v6.sendto(query_data, (dnsserver, 53))
  523. while time.time() < timeout_at:
  524. ins, _, _ = select.select(socks, [], [], 0.1)
  525. for sock in ins:
  526. reply_data, (reply_server, _) = sock.recvfrom(512)
  527. record = dnslib.DNSRecord.parse(reply_data)
  528. rtypes = (1, 28) if sock is sock_v6 else (1,)
  529. iplist = [str(x.rdata) for x in record.rr if x.rtype in rtypes]
  530. if any(x in blacklist for x in iplist):
  531. logging.warning('query qname=%r dnsservers=%r record bad iplist=%r', qname, dnsservers, iplist)
  532. elif record.header.rcode and not iplist and reply_server in turstservers:
  533. logging.info('query qname=%r trust reply_server=%r record rcode=%s', qname, reply_server, record.header.rcode)
  534. return record
  535. elif iplist:
  536. logging.debug('query qname=%r reply_server=%r record iplist=%s', qname, reply_server, iplist)
  537. return record
  538. else:
  539. logging.debug('query qname=%r reply_server=%r record null iplist=%s', qname, reply_server, iplist)
  540. continue
  541. except socket.error as e:
  542. logging.warning('handle dns query=%s socket: %r', query, e)
  543. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (qname, dnsservers))
  544. finally:
  545. for sock in socks:
  546. sock.close()
  547. def dnslib_resolve_over_tcp(qname, dnsservers, timeout, **kwargs):
  548. """dns query over tcp"""
  549. blacklist = kwargs.get('blacklist', ())
  550. def do_resolve(qname, dnsserver, timeout, queobj):
  551. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(qname))
  552. query_data = query.pack()
  553. sock_family = socket.AF_INET6 if ':' in dnsserver else socket.AF_INET
  554. sock = socket.socket(sock_family)
  555. rfile = None
  556. try:
  557. sock.settimeout(timeout or None)
  558. sock.connect((dnsserver, 53))
  559. sock.send(struct.pack('>h', len(query_data)) + query_data)
  560. rfile = sock.makefile('r', 1024)
  561. reply_data_length = rfile.read(2)
  562. if len(reply_data_length) < 2:
  563. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (qname, dnsserver))
  564. reply_data = rfile.read(struct.unpack('>h', reply_data_length)[0])
  565. record = dnslib.DNSRecord.parse(reply_data)
  566. rtypes = (1, 28) if sock_family is socket.AF_INET6 else (1,)
  567. iplist = [str(x.rdata) for x in record.rr if x.rtype in rtypes]
  568. if any(x in blacklist for x in iplist):
  569. logging.debug('query qname=%r dnsserver=%r record bad iplist=%r', qname, dnsserver, iplist)
  570. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (qname, dnsserver))
  571. else:
  572. logging.debug('query qname=%r dnsserver=%r record iplist=%s', qname, dnsserver, iplist)
  573. queobj.put(record)
  574. except socket.error as e:
  575. logging.debug('query qname=%r dnsserver=%r failed %r', qname, dnsserver, e)
  576. queobj.put(e)
  577. finally:
  578. if rfile:
  579. rfile.close()
  580. sock.close()
  581. queobj = Queue.Queue()
  582. for dnsserver in dnsservers:
  583. thread.start_new_thread(do_resolve, (qname, dnsserver, timeout, queobj))
  584. for i in range(len(dnsservers)):
  585. try:
  586. result = queobj.get(timeout)
  587. except Queue.Empty:
  588. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (qname, dnsservers))
  589. if result and not isinstance(result, Exception):
  590. return result
  591. elif i == len(dnsservers) - 1:
  592. logging.warning('dnslib_resolve_over_tcp %r with %s return %r', qname, dnsservers, result)
  593. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (qname, dnsservers))
  594. def dnslib_record2iplist(record):
  595. """convert dnslib.DNSRecord to iplist"""
  596. assert isinstance(record, dnslib.DNSRecord)
  597. return [str(x.rdata) for x in record.rr if x.rtype in (1, 28)]
  598. def get_dnsserver_list():
  599. if os.name == 'nt':
  600. import ctypes, ctypes.wintypes, struct, socket
  601. DNS_CONFIG_DNS_SERVER_LIST = 6
  602. buf = ctypes.create_string_buffer(2048)
  603. ctypes.windll.dnsapi.DnsQueryConfig(DNS_CONFIG_DNS_SERVER_LIST, 0, None, None, ctypes.byref(buf), ctypes.byref(ctypes.wintypes.DWORD(len(buf))))
  604. ips = struct.unpack('I', buf[0:4])[0]
  605. out = []
  606. for i in xrange(ips):
  607. start = (i+1) * 4
  608. out.append(socket.inet_ntoa(buf[start:start+4]))
  609. return out
  610. elif os.path.isfile('/etc/resolv.conf'):
  611. with open('/etc/resolv.conf', 'rb') as fp:
  612. return re.findall(r'(?m)^nameserver\s+(\S+)', fp.read())
  613. else:
  614. logging.warning("get_dnsserver_list failed: unsupport platform '%s-%s'", sys.platform, os.name)
  615. return []
  616. def spawn_later(seconds, target, *args, **kwargs):
  617. def wrap(*args, **kwargs):
  618. __import__('time').sleep(seconds)
  619. return target(*args, **kwargs)
  620. return __import__('thread').start_new_thread(wrap, args, kwargs)
  621. def is_clienthello(data):
  622. if len(data) < 20:
  623. return False
  624. if data.startswith('\x16\x03'):
  625. # TLSv12/TLSv11/TLSv1/SSLv3
  626. length, = struct.unpack('>h', data[3:5])
  627. return len(data) == 5 + length
  628. elif data[0] == '\x80' and data[2:4] == '\x01\x03':
  629. # SSLv23
  630. return len(data) == 2 + ord(data[1])
  631. else:
  632. return False
  633. def extract_sni_name(packet):
  634. if packet.startswith('\x16\x03'):
  635. stream = io.BytesIO(packet)
  636. stream.read(0x2b)
  637. session_id_length = ord(stream.read(1))
  638. stream.read(session_id_length)
  639. cipher_suites_length, = struct.unpack('>h', stream.read(2))
  640. stream.read(cipher_suites_length+2)
  641. extensions_length, = struct.unpack('>h', stream.read(2))
  642. extensions = {}
  643. while True:
  644. data = stream.read(2)
  645. if not data:
  646. break
  647. etype, = struct.unpack('>h', data)
  648. elen, = struct.unpack('>h', stream.read(2))
  649. edata = stream.read(elen)
  650. if etype == 0:
  651. server_name = edata[5:]
  652. return server_name
  653. class URLFetch(object):
  654. """URLFetch for gae/php fetchservers"""
  655. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  656. def __init__(self, fetchserver, create_http_request):
  657. assert isinstance(fetchserver, basestring) and callable(create_http_request)
  658. self.fetchserver = fetchserver
  659. self.create_http_request = create_http_request
  660. def fetch(self, method, url, headers, body, timeout, **kwargs):
  661. if '.appspot.com/' in self.fetchserver:
  662. response = self.__gae_fetch(method, url, headers, body, timeout, **kwargs)
  663. response.app_header_parsed = True
  664. else:
  665. response = self.__php_fetch(method, url, headers, body, timeout, **kwargs)
  666. response.app_header_parsed = False
  667. return response
  668. def __gae_fetch(self, method, url, headers, body, timeout, **kwargs):
  669. # deflate = lambda x:zlib.compress(x)[2:-4]
  670. rc4crypt = lambda s, k: RC4Cipher(k).encrypt(s) if k else s
  671. if body:
  672. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  673. zbody = zlib.compress(body)[2:-4]
  674. if len(zbody) < len(body):
  675. body = zbody
  676. headers['Content-Encoding'] = 'deflate'
  677. headers['Content-Length'] = str(len(body))
  678. # GAE donot allow set `Host` header
  679. if 'Host' in headers:
  680. del headers['Host']
  681. metadata = 'G-Method:%s\nG-Url:%s\n%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v))
  682. skip_headers = self.skip_headers
  683. metadata += ''.join('%s:%s\n' % (k.title(), v) for k, v in headers.items() if k not in skip_headers)
  684. # prepare GAE request
  685. request_method = 'POST'
  686. request_headers = {}
  687. if common.GAE_OBFUSCATE:
  688. if 'rc4' in common.GAE_OPTIONS:
  689. request_headers['X-GOA-Options'] = 'rc4'
  690. cookie = base64.b64encode(rc4crypt(zlib.compress(metadata)[2:-4], kwargs.get('password'))).strip()
  691. body = rc4crypt(body, kwargs.get('password'))
  692. else:
  693. cookie = base64.b64encode(zlib.compress(metadata)[2:-4]).strip()
  694. request_headers['Cookie'] = cookie
  695. if body:
  696. request_headers['Content-Length'] = str(len(body))
  697. else:
  698. request_method = 'GET'
  699. else:
  700. metadata = zlib.compress(metadata)[2:-4]
  701. body = '%s%s%s' % (struct.pack('!h', len(metadata)), metadata, body)
  702. if 'rc4' in common.GAE_OPTIONS:
  703. request_headers['X-GOA-Options'] = 'rc4'
  704. body = rc4crypt(body, kwargs.get('password'))
  705. request_headers['Content-Length'] = str(len(body))
  706. # post data
  707. need_crlf = 0 if common.GAE_MODE == 'https' else 1
  708. need_validate = common.GAE_VALIDATE
  709. cache_key = '%s:%d' % (common.HOST_POSTFIX_MAP['.appspot.com'], 443 if common.GAE_MODE == 'https' else 80)
  710. response = self.create_http_request(request_method, self.fetchserver, request_headers, body, timeout, crlf=need_crlf, validate=need_validate, cache_key=cache_key)
  711. response.app_status = response.status
  712. response.app_options = response.getheader('X-GOA-Options', '')
  713. if response.status != 200:
  714. return response
  715. data = response.read(4)
  716. if len(data) < 4:
  717. response.status = 502
  718. response.fp = io.BytesIO(b'connection aborted. too short leadbyte data=' + data)
  719. response.read = response.fp.read
  720. return response
  721. response.status, headers_length = struct.unpack('!hh', data)
  722. data = response.read(headers_length)
  723. if len(data) < headers_length:
  724. response.status = 502
  725. response.fp = io.BytesIO(b'connection aborted. too short headers data=' + data)
  726. response.read = response.fp.read
  727. return response
  728. if 'rc4' not in response.app_options:
  729. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(data, -zlib.MAX_WBITS)))
  730. else:
  731. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(rc4crypt(data, kwargs.get('password')), -zlib.MAX_WBITS)))
  732. if kwargs.get('password') and response.fp:
  733. response.fp = CipherFileObject(response.fp, RC4Cipher(kwargs['password']))
  734. return response
  735. def __php_fetch(self, method, url, headers, body, timeout, **kwargs):
  736. if body:
  737. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  738. zbody = zlib.compress(body)[2:-4]
  739. if len(zbody) < len(body):
  740. body = zbody
  741. headers['Content-Encoding'] = 'deflate'
  742. headers['Content-Length'] = str(len(body))
  743. skip_headers = self.skip_headers
  744. metadata = 'G-Method:%s\nG-Url:%s\n%s%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v), ''.join('%s:%s\n' % (k, v) for k, v in headers.items() if k not in skip_headers))
  745. metadata = zlib.compress(metadata)[2:-4]
  746. app_body = b''.join((struct.pack('!h', len(metadata)), metadata, body))
  747. app_headers = {'Content-Length': len(app_body), 'Content-Type': 'application/octet-stream'}
  748. fetchserver = '%s?%s' % (self.fetchserver, random.random())
  749. crlf = 0
  750. cache_key = '%s//:%s' % urlparse.urlsplit(fetchserver)[:2]
  751. response = self.create_http_request('POST', fetchserver, app_headers, app_body, timeout, crlf=crlf, cache_key=cache_key)
  752. if not response:
  753. raise socket.error(errno.ECONNRESET, 'urlfetch %r return None' % url)
  754. if response.status >= 400:
  755. return response
  756. response.app_status = response.status
  757. need_decrypt = kwargs.get('password') and response.app_status == 200 and response.getheader('Content-Type', '') == 'image/gif' and response.fp
  758. if need_decrypt:
  759. response.fp = CipherFileObject(response.fp, XORCipher(kwargs['password'][0]))
  760. return response
  761. class BaseProxyHandlerFilter(object):
  762. """base proxy handler filter"""
  763. def filter(self, handler):
  764. raise NotImplementedError
  765. class SimpleProxyHandlerFilter(BaseProxyHandlerFilter):
  766. """simple proxy handler filter"""
  767. def filter(self, handler):
  768. if handler.command == 'CONNECT':
  769. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  770. else:
  771. return [handler.DIRECT, {}]
  772. class AuthFilter(BaseProxyHandlerFilter):
  773. """authorization filter"""
  774. auth_info = "Proxy authentication required"""
  775. white_list = set(['127.0.0.1'])
  776. def __init__(self, username, password):
  777. self.username = username
  778. self.password = password
  779. def check_auth_header(self, auth_header):
  780. method, _, auth_data = auth_header.partition(' ')
  781. if method == 'Basic':
  782. username, _, password = base64.b64decode(auth_data).partition(':')
  783. if username == self.username and password == self.password:
  784. return True
  785. return False
  786. def filter(self, handler):
  787. if self.white_list and handler.client_address[0] in self.white_list:
  788. return None
  789. auth_header = handler.headers.get('Proxy-Authorization') or getattr(handler, 'auth_header', None)
  790. if auth_header and self.check_auth_header(auth_header):
  791. handler.auth_header = auth_header
  792. else:
  793. headers = {'Access-Control-Allow-Origin': '*',
  794. 'Proxy-Authenticate': 'Basic realm="%s"' % self.auth_info,
  795. 'Content-Length': '0',
  796. 'Connection': 'keep-alive'}
  797. return [handler.MOCK, 407, headers, '']
  798. class SimpleProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
  799. """SimpleProxyHandler for GoAgent 3.x"""
  800. protocol_version = 'HTTP/1.1'
  801. ssl_version = ssl.PROTOCOL_SSLv23
  802. disable_transport_ssl = True
  803. scheme = 'http'
  804. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  805. bufsize = 256 * 1024
  806. max_timeout = 16
  807. connect_timeout = 8
  808. first_run_lock = threading.Lock()
  809. handler_filters = [SimpleProxyHandlerFilter()]
  810. sticky_filter = None
  811. def finish(self):
  812. """make python2 BaseHTTPRequestHandler happy"""
  813. try:
  814. BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
  815. except NetWorkIOError as e:
  816. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  817. raise
  818. def address_string(self):
  819. return '%s:%s' % self.client_address[:2]
  820. def send_response(self, code, message=None):
  821. if message is None:
  822. if code in self.responses:
  823. message = self.responses[code][0]
  824. else:
  825. message = ''
  826. if self.request_version != 'HTTP/0.9':
  827. self.wfile.write('%s %d %s\r\n' % (self.protocol_version, code, message))
  828. def send_header(self, keyword, value):
  829. """Send a MIME header."""
  830. base_send_header = BaseHTTPServer.BaseHTTPRequestHandler.send_header
  831. keyword = keyword.title()
  832. if keyword == 'Set-Cookie':
  833. for cookie in re.split(r', (?=[^ =]+(?:=|$))', value):
  834. base_send_header(self, keyword, cookie)
  835. elif keyword == 'Content-Disposition' and '"' not in value:
  836. value = re.sub(r'filename=([^"\']+)', 'filename="\\1"', value)
  837. base_send_header(self, keyword, value)
  838. else:
  839. base_send_header(self, keyword, value)
  840. def setup(self):
  841. if isinstance(self.__class__.first_run, collections.Callable):
  842. try:
  843. with self.__class__.first_run_lock:
  844. if isinstance(self.__class__.first_run, collections.Callable):
  845. self.first_run()
  846. self.__class__.first_run = None
  847. except StandardError as e:
  848. logging.exception('%s.first_run() return %r', self.__class__, e)
  849. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  850. self.__class__.do_CONNECT = self.__class__.do_METHOD
  851. self.__class__.do_GET = self.__class__.do_METHOD
  852. self.__class__.do_PUT = self.__class__.do_METHOD
  853. self.__class__.do_POST = self.__class__.do_METHOD
  854. self.__class__.do_HEAD = self.__class__.do_METHOD
  855. self.__class__.do_DELETE = self.__class__.do_METHOD
  856. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  857. self.setup()
  858. def handle_one_request(self):
  859. if not self.disable_transport_ssl and self.scheme == 'http':
  860. leadbyte = self.connection.recv(1, socket.MSG_PEEK)
  861. if leadbyte in ('\x80', '\x16'):
  862. server_name = ''
  863. if leadbyte == '\x16':
  864. for _ in xrange(2):
  865. leaddata = self.connection.recv(1024, socket.MSG_PEEK)
  866. if is_clienthello(leaddata):
  867. try:
  868. server_name = extract_sni_name(leaddata)
  869. finally:
  870. break
  871. try:
  872. certfile = CertUtil.get_cert(server_name or 'www.google.com')
  873. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  874. except StandardError as e:
  875. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  876. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  877. return
  878. self.connection = ssl_sock
  879. self.rfile = self.connection.makefile('rb', self.bufsize)
  880. self.wfile = self.connection.makefile('wb', 0)
  881. self.scheme = 'https'
  882. return BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self)
  883. def first_run(self):
  884. pass
  885. def gethostbyname2(self, hostname):
  886. return socket.gethostbyname_ex(hostname)[-1]
  887. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  888. return socket.create_connection((hostname, port), timeout)
  889. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  890. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  891. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version)
  892. return ssl_sock
  893. def create_http_request(self, method, url, headers, body, timeout, **kwargs):
  894. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  895. if netloc.rfind(':') <= netloc.rfind(']'):
  896. # no port number
  897. host = netloc
  898. port = 443 if scheme == 'https' else 80
  899. else:
  900. host, _, port = netloc.rpartition(':')
  901. port = int(port)
  902. if query:
  903. path += '?' + query
  904. if 'Host' not in headers:
  905. headers['Host'] = host
  906. if body and 'Content-Length' not in headers:
  907. headers['Content-Length'] = str(len(body))
  908. ConnectionType = httplib.HTTPSConnection if scheme == 'https' else httplib.HTTPConnection
  909. connection = ConnectionType(netloc, timeout=timeout)
  910. connection.request(method, path, body=body, headers=headers)
  911. response = connection.getresponse(buffering=True)
  912. return response
  913. def create_http_request_withserver(self, fetchserver, method, url, headers, body, timeout, **kwargs):
  914. return URLFetch(fetchserver, self.create_http_request).fetch(method, url, headers, body, timeout, **kwargs)
  915. def handle_urlfetch_error(self, fetchserver, response):
  916. pass
  917. def handle_urlfetch_response_close(self, fetchserver, response):
  918. pass
  919. def parse_header(self):
  920. if self.command == 'CONNECT':
  921. netloc = self.path
  922. elif self.path[0] == '/':
  923. netloc = self.headers.get('Host', 'localhost')
  924. self.path = '%s://%s%s' % (self.scheme, netloc, self.path)
  925. else:
  926. netloc = urlparse.urlsplit(self.path).netloc
  927. m = re.match(r'^(.+):(\d+)$', netloc)
  928. if m:
  929. self.host = m.group(1).strip('[]')
  930. self.port = int(m.group(2))
  931. else:
  932. self.host = netloc
  933. self.port = 443 if self.scheme == 'https' else 80
  934. def forward_socket(self, local, remote, timeout):
  935. try:
  936. tick = 1
  937. bufsize = self.bufsize
  938. timecount = timeout
  939. while 1:
  940. timecount -= tick
  941. if timecount <= 0:
  942. break
  943. (ins, _, errors) = select.select([local, remote], [], [local, remote], tick)
  944. if errors:
  945. break
  946. for sock in ins:
  947. data = sock.recv(bufsize)
  948. if not data:
  949. break
  950. if sock is remote:
  951. local.sendall(data)
  952. timecount = timeout
  953. else:
  954. remote.sendall(data)
  955. timecount = timeout
  956. except socket.timeout:
  957. pass
  958. except NetWorkIOError as e:
  959. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  960. raise
  961. if e.args[0] in (errno.EBADF,):
  962. return
  963. finally:
  964. for sock in (remote, local):
  965. try:
  966. sock.close()
  967. except StandardError:
  968. pass
  969. def MOCK(self, status, headers, content):
  970. """mock response"""
  971. logging.info('%s "MOCK %s %s %s" %d %d', self.address_string(), self.command, self.path, self.protocol_version, status, len(content))
  972. headers = {k.title(): v for k, v in headers.items()}
  973. if 'Transfer-Encoding' in headers:
  974. del headers['Transfer-Encoding']
  975. if 'Content-Length' not in headers:
  976. headers['Content-Length'] = len(content)
  977. if 'Connection' not in headers:
  978. headers['Connection'] = 'close'
  979. self.send_response(status)
  980. for key, value in headers.items():
  981. self.send_header(key, value)
  982. self.end_headers()
  983. self.wfile.write(content)
  984. def STRIP(self, do_ssl_handshake=True, sticky_filter=None):
  985. """strip connect"""
  986. certfile = CertUtil.get_cert(self.host)
  987. logging.info('%s "STRIP %s %s:%d %s" - -', self.address_string(), self.command, self.host, self.port, self.protocol_version)
  988. self.send_response(200)
  989. self.end_headers()
  990. if do_ssl_handshake:
  991. try:
  992. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  993. except StandardError as e:
  994. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  995. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  996. return
  997. self.connection = ssl_sock
  998. self.rfile = self.connection.makefile('rb', self.bufsize)
  999. self.wfile = self.connection.makefile('wb', 0)
  1000. self.scheme = 'https'
  1001. try:
  1002. self.raw_requestline = self.rfile.readline(65537)
  1003. if len(self.raw_requestline) > 65536:
  1004. self.requestline = ''
  1005. self.request_version = ''
  1006. self.command = ''
  1007. self.send_error(414)
  1008. return
  1009. if not self.raw_requestline:
  1010. self.close_connection = 1
  1011. return
  1012. if not self.parse_request():
  1013. return
  1014. except NetWorkIOError as e:
  1015. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1016. raise
  1017. self.sticky_filter = sticky_filter
  1018. try:
  1019. self.do_METHOD()
  1020. except NetWorkIOError as e:
  1021. if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
  1022. raise
  1023. def FORWARD(self, hostname, port, timeout, kwargs={}):
  1024. """forward socket"""
  1025. do_ssl_handshake = kwargs.pop('do_ssl_handshake', False)
  1026. local = self.connection
  1027. remote = None
  1028. self.send_response(200)
  1029. self.end_headers()
  1030. self.close_connection = 1
  1031. data = local.recv(1024)
  1032. if not data:
  1033. local.close()
  1034. return
  1035. data_is_clienthello = is_clienthello(data)
  1036. if data_is_clienthello:
  1037. kwargs['client_hello'] = data
  1038. max_retry = kwargs.get('max_retry', 3)
  1039. for i in xrange(max_retry):
  1040. try:
  1041. if do_ssl_handshake:
  1042. remote = self.create_ssl_connection(hostname, port, timeout, **kwargs)
  1043. else:
  1044. remote = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  1045. if not data_is_clienthello and remote and not isinstance(remote, Exception):
  1046. remote.sendall(data)
  1047. break
  1048. except StandardError as e:
  1049. logging.exception('%s "FWD %s %s:%d %s" %r', self.address_string(), self.command, hostname, port, self.protocol_version, e)
  1050. if hasattr(remote, 'close'):
  1051. remote.close()
  1052. if i == max_retry - 1:
  1053. raise
  1054. logging.info('%s "FWD %s %s:%d %s" - -', self.address_string(), self.command, hostname, port, self.protocol_version)
  1055. if hasattr(remote, 'fileno'):
  1056. # reset timeout default to avoid long http upload failure, but it will delay timeout retry :(
  1057. remote.settimeout(None)
  1058. del kwargs
  1059. self.forward_socket(local, remote, self.max_timeout)
  1060. def DIRECT(self, kwargs):
  1061. method = self.command
  1062. if 'url' in kwargs:
  1063. url = kwargs.pop('url')
  1064. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1065. url = self.path
  1066. else:
  1067. url = 'http://%s%s' % (self.headers['Host'], self.path)
  1068. headers = {k.title(): v for k, v in self.headers.items()}
  1069. body = self.body
  1070. response = None
  1071. try:
  1072. response = self.create_http_request(method, url, headers, body, timeout=self.connect_timeout, **kwargs)
  1073. logging.info('%s "DIRECT %s %s %s" %s %s', self.address_string(), self.command, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1074. response_headers = {k.title(): v for k, v in response.getheaders()}
  1075. self.send_response(response.status)
  1076. for key, value in response.getheaders():
  1077. self.send_header(key, value)
  1078. self.end_headers()
  1079. if self.command == 'HEAD' or response.status in (204, 304):
  1080. response.close()
  1081. return
  1082. need_chunked = 'Transfer-Encoding' in response_headers
  1083. while True:
  1084. data = response.read(8192)
  1085. if not data:
  1086. if need_chunked:
  1087. self.wfile.write('0\r\n\r\n')
  1088. break
  1089. if need_chunked:
  1090. self.wfile.write('%x\r\n' % len(data))
  1091. self.wfile.write(data)
  1092. if need_chunked:
  1093. self.wfile.write('\r\n')
  1094. del data
  1095. except (ssl.SSLError, socket.timeout, socket.error):
  1096. if response:
  1097. if response.fp and response.fp._sock:
  1098. response.fp._sock.close()
  1099. response.close()
  1100. finally:
  1101. if response:
  1102. response.close()
  1103. def URLFETCH(self, fetchservers, max_retry=2, kwargs={}):
  1104. """urlfetch from fetchserver"""
  1105. method = self.command
  1106. if self.path[0] == '/':
  1107. url = '%s://%s%s' % (self.scheme, self.headers['Host'], self.path)
  1108. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1109. url = self.path
  1110. else:
  1111. raise ValueError('URLFETCH %r is not a valid url' % self.path)
  1112. headers = {k.title(): v for k, v in self.headers.items()}
  1113. body = self.body
  1114. response = None
  1115. errors = []
  1116. fetchserver = fetchservers[0]
  1117. for i in xrange(max_retry):
  1118. try:
  1119. response = self.create_http_request_withserver(fetchserver, method, url, headers, body, timeout=60, **kwargs)
  1120. if response.app_status < 400:
  1121. break
  1122. else:
  1123. self.handle_urlfetch_error(fetchserver, response)
  1124. if i < max_retry - 1:
  1125. if len(fetchservers) > 1:
  1126. fetchserver = random.choice(fetchservers[1:])
  1127. logging.info('URLFETCH return %d, trying fetchserver=%r', response.app_status, fetchserver)
  1128. response.close()
  1129. except StandardError as e:
  1130. errors.append(e)
  1131. logging.info('URLFETCH "%s %s" fetchserver=%r %r, retry...', method, url, fetchserver, e)
  1132. if len(errors) == max_retry:
  1133. if response and response.app_status >= 500:
  1134. status = response.app_status
  1135. headers = dict(response.getheaders())
  1136. content = response.read()
  1137. response.close()
  1138. else:
  1139. status = 502
  1140. headers = {'Content-Type': 'text/html'}
  1141. content = message_html('502 URLFetch failed', 'Local URLFetch %r failed' % url, '<br>'.join(repr(x) for x in errors))
  1142. return self.MOCK(status, headers, content)
  1143. logging.info('%s "URL %s %s %s" %s %s', self.address_string(), method, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1144. try:
  1145. if response.status == 206:
  1146. return RangeFetch(self, response, fetchservers, **kwargs).fetch()
  1147. if response.app_header_parsed:
  1148. self.close_connection = not response.getheader('Content-Length')
  1149. self.send_response(response.status)
  1150. for key, value in response.getheaders():
  1151. if key.title() == 'Transfer-Encoding':
  1152. continue
  1153. self.send_header(key, value)
  1154. self.end_headers()
  1155. bufsize = 8192
  1156. while True:
  1157. data = response.read(bufsize)
  1158. if data:
  1159. self.wfile.write(data)
  1160. if not data:
  1161. self.handle_urlfetch_response_close(fetchserver, response)
  1162. response.close()
  1163. break
  1164. del data
  1165. except NetWorkIOError as e:
  1166. if e[0] in (errno.ECONNABORTED, errno.EPIPE) or 'bad write retry' in repr(e):
  1167. return
  1168. def do_METHOD(self):
  1169. self.parse_header()
  1170. self.body = self.rfile.read(int(self.headers['Content-Length'])) if 'Content-Length' in self.headers else ''
  1171. if self.sticky_filter:
  1172. action = self.sticky_filter.filter(self)
  1173. if action:
  1174. return action.pop(0)(*action)
  1175. for handler_filter in self.handler_filters:
  1176. action = handler_filter.filter(self)
  1177. if action:
  1178. return action.pop(0)(*action)
  1179. class RangeFetch(object):
  1180. """Range Fetch Class"""
  1181. threads = 2
  1182. maxsize = 1024*1024*4
  1183. bufsize = 8192
  1184. waitsize = 1024*512
  1185. def __init__(self, handler, response, fetchservers, **kwargs):
  1186. self.handler = handler
  1187. self.url = handler.path
  1188. self.response = response
  1189. self.fetchservers = fetchservers
  1190. self.kwargs = kwargs
  1191. self._stopped = None
  1192. self._last_app_status = {}
  1193. self.expect_begin = 0
  1194. def fetch(self):
  1195. response_status = self.response.status
  1196. response_headers = dict((k.title(), v) for k, v in self.response.getheaders())
  1197. content_range = response_headers['Content-Range']
  1198. #content_length = response_headers['Content-Length']
  1199. start, end, length = tuple(int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)', content_range).group(1, 2, 3))
  1200. if start == 0:
  1201. response_status = 200
  1202. response_headers['Content-Length'] = str(length)
  1203. del response_headers['Content-Range']
  1204. else:
  1205. response_headers['Content-Range'] = 'bytes %s-%s/%s' % (start, end, length)
  1206. response_headers['Content-Length'] = str(length-start)
  1207. logging.info('>>>>>>>>>>>>>>> RangeFetch started(%r) %d-%d', self.url, start, end)
  1208. self.handler.send_response(response_status)
  1209. for key, value in response_headers.items():
  1210. self.handler.send_header(key, value)
  1211. self.handler.end_headers()
  1212. data_queue = Queue.PriorityQueue()
  1213. range_queue = Queue.PriorityQueue()
  1214. range_queue.put((start, end, self.response))
  1215. self.expect_begin = start
  1216. for begin in range(end+1, length, self.maxsize):
  1217. range_queue.put((begin, min(begin+self.maxsize-1, length-1), None))
  1218. for i in xrange(0, self.threads):
  1219. range_delay_size = i * self.maxsize
  1220. spawn_later(float(range_delay_size)/self.waitsize, self.__fetchlet, range_queue, data_queue, range_delay_size)
  1221. has_peek = hasattr(data_queue, 'peek')
  1222. peek_timeout = 120
  1223. while self.expect_begin < length - 1:
  1224. try:
  1225. if has_peek:
  1226. begin, data = data_queue.peek(timeout=peek_timeout)
  1227. if self.expect_begin == begin:
  1228. data_queue.get()
  1229. elif self.expect_begin < begin:
  1230. time.sleep(0.1)
  1231. continue
  1232. else:
  1233. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1234. break
  1235. else:
  1236. begin, data = data_queue.get(timeout=peek_timeout)
  1237. if self.expect_begin == begin:
  1238. pass
  1239. elif self.expect_begin < begin:
  1240. data_queue.put((begin, data))
  1241. time.sleep(0.1)
  1242. continue
  1243. else:
  1244. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1245. break
  1246. except Queue.Empty:
  1247. logging.error('data_queue peek timeout, break')
  1248. break
  1249. try:
  1250. self.handler.wfile.write(data)
  1251. self.expect_begin += len(data)
  1252. del data
  1253. except StandardError as e:
  1254. logging.info('RangeFetch client connection aborted(%s).', e)
  1255. break
  1256. self._stopped = True
  1257. def __fetchlet(self, range_queue, data_queue, range_delay_size):
  1258. headers = dict((k.title(), v) for k, v in self.handler.headers.items())
  1259. headers['Connection'] = 'close'
  1260. while 1:
  1261. try:
  1262. if self._stopped:
  1263. return
  1264. try:
  1265. start, end, response = range_queue.get(timeout=1)
  1266. if self.expect_begin < start and data_queue.qsize() * self.bufsize + range_delay_size > 30*1024*1024:
  1267. range_queue.put((start, end, response))
  1268. time.sleep(10)
  1269. continue
  1270. headers['Range'] = 'bytes=%d-%d' % (start, end)
  1271. fetchserver = ''
  1272. if not response:
  1273. fetchserver = random.choice(self.fetchservers)
  1274. if self._last_app_status.get(fetchserver, 200) >= 500:
  1275. time.sleep(5)
  1276. response = self.handler.create_http_request_withserver(fetchserver, self.handler.command, self.url, headers, self.handler.body, timeout=self.handler.connect_timeout, **self.kwargs)
  1277. except Queue.Empty:
  1278. continue
  1279. except StandardError as e:
  1280. logging.warning("Response %r in __fetchlet", e)
  1281. range_queue.put((start, end, None))
  1282. continue
  1283. if not response:
  1284. logging.warning('RangeFetch %s return %r', headers['Range'], response)
  1285. range_queue.put((start, end, None))
  1286. continue
  1287. if fetchserver:
  1288. self._last_app_status[fetchserver] = response.app_status
  1289. if response.app_status != 200:
  1290. logging.warning('Range Fetch "%s %s" %s return %s', self.handler.command, self.url, headers['Range'], response.app_status)
  1291. response.close()
  1292. range_queue.put((start, end, None))
  1293. continue
  1294. if response.getheader('Location'):
  1295. self.url = urlparse.urljoin(self.url, response.getheader('Location'))
  1296. logging.info('RangeFetch Redirect(%r)', self.url)
  1297. response.close()
  1298. range_queue.put((start, end, None))
  1299. continue
  1300. if 200 <= response.status < 300:
  1301. content_range = response.getheader('Content-Range')
  1302. if not content_range:
  1303. logging.warning('RangeFetch "%s %s" return Content-Range=%r: response headers=%r', self.handler.command, self.url, content_range, response.getheaders())
  1304. response.close()
  1305. range_queue.put((start, end, None))
  1306. continue
  1307. content_length = int(response.getheader('Content-Length', 0))
  1308. logging.info('>>>>>>>>>>>>>>> [thread %s] %s %s', threading.currentThread().ident, content_length, content_range)
  1309. while 1:
  1310. try:
  1311. if self._stopped:
  1312. response.close()
  1313. return
  1314. data = response.read(self.bufsize)
  1315. if not data:
  1316. break
  1317. data_queue.put((start, data))
  1318. start += len(data)
  1319. except StandardError as e:
  1320. logging.warning('RangeFetch "%s %s" %s failed: %s', self.handler.command, self.url, headers['Range'], e)
  1321. break
  1322. if start < end + 1:
  1323. logging.warning('RangeFetch "%s %s" retry %s-%s', self.handler.command, self.url, start, end)
  1324. response.close()
  1325. range_queue.put((start, end, None))
  1326. continue
  1327. logging.info('>>>>>>>>>>>>>>> Successfully reached %d bytes.', start - 1)
  1328. else:
  1329. logging.error('RangeFetch %r return %s', self.url, response.status)
  1330. response.close()
  1331. range_queue.put((start, end, None))
  1332. continue
  1333. except StandardError as e:
  1334. logging.exception('RangeFetch._fetchlet error:%s', e)
  1335. raise
  1336. class AdvancedProxyHandler(SimpleProxyHandler):
  1337. """Advanced Proxy Handler"""
  1338. dns_cache = LRUCache(64*1024)
  1339. dns_servers = []
  1340. dns_blacklist = []
  1341. tcp_connection_time = collections.defaultdict(float)
  1342. tcp_connection_time_with_clienthello = collections.defaultdict(float)
  1343. tcp_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1344. ssl_connection_time = collections.defaultdict(float)
  1345. ssl_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1346. ssl_connection_keepalive = False
  1347. max_window = 4
  1348. def gethostbyname2(self, hostname):
  1349. try:
  1350. iplist = self.dns_cache[hostname]
  1351. except KeyError:
  1352. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  1353. iplist = [hostname]
  1354. elif self.dns_servers:
  1355. try:
  1356. record = dnslib_resolve_over_udp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1357. except socket.gaierror:
  1358. record = dnslib_resolve_over_tcp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1359. iplist = dnslib_record2iplist(record)
  1360. else:
  1361. iplist = socket.gethostbyname_ex(hostname)[-1]
  1362. self.dns_cache[hostname] = iplist
  1363. return iplist
  1364. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  1365. client_hello = kwargs.get('client_hello', None)
  1366. cache_key = kwargs.get('cache_key') if not client_hello else None
  1367. def create_connection(ipaddr, timeout, queobj):
  1368. sock = None
  1369. try:
  1370. # create a ipv4/ipv6 socket object
  1371. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1372. # set reuseaddr option to avoid 10048 socket error
  1373. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1374. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1375. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1376. # disable nagle algorithm to send http request quickly.
  1377. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1378. # set a short timeout to trigger timeout retry more quickly.
  1379. sock.settimeout(timeout or self.connect_timeout)
  1380. # start connection time record
  1381. start_time = time.time()
  1382. # TCP connect
  1383. sock.connect(ipaddr)
  1384. # record TCP connection time
  1385. self.tcp_connection_time[ipaddr] = time.time() - start_time
  1386. # send client hello and peek server hello
  1387. if client_hello:
  1388. sock.sendall(client_hello)
  1389. if hasattr(socket, 'MSG_PEEK'):
  1390. peek_data = sock.recv(1, socket.MSG_PEEK)
  1391. if not peek_data:
  1392. logging.debug('create_tcp_connection %r with client_hello return NULL byte, continue %r', ipaddr, time.time()-start_time)
  1393. raise socket.timeout('timed out')
  1394. # record TCP connection time with client hello
  1395. self.tcp_connection_time_with_clienthello[ipaddr] = time.time() - start_time
  1396. # put tcp socket object to output queobj
  1397. queobj.put(sock)
  1398. except (socket.error, OSError) as e:
  1399. # any socket.error, put Excpetions to output queobj.
  1400. queobj.put(e)
  1401. # reset a large and random timeout to the ipaddr
  1402. self.tcp_connection_time[ipaddr] = self.connect_timeout+random.random()
  1403. # close tcp socket
  1404. if sock:
  1405. sock.close()
  1406. def close_connection(count, queobj, first_tcp_time):
  1407. for _ in range(count):
  1408. sock = queobj.get()
  1409. tcp_time_threshold = min(1, 1.3 * first_tcp_time)
  1410. if sock and not isinstance(sock, Exception):
  1411. ipaddr = sock.getpeername()
  1412. if cache_key and self.tcp_connection_time[ipaddr] < tcp_time_threshold:
  1413. cache_queue = self.tcp_connection_cache[cache_key]
  1414. if cache_queue.qsize() < 8:
  1415. try:
  1416. _, old_sock = cache_queue.get_nowait()
  1417. old_sock.close()
  1418. except Queue.Empty:
  1419. pass
  1420. cache_queue.put((time.time(), sock))
  1421. else:
  1422. sock.close()
  1423. try:
  1424. while cache_key:
  1425. ctime, sock = self.tcp_connection_cache[cache_key].get_nowait()
  1426. if time.time() - ctime < 30:
  1427. return sock
  1428. else:
  1429. sock.close()
  1430. except Queue.Empty:
  1431. pass
  1432. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1433. sock = None
  1434. for _ in range(kwargs.get('max_retry', 3)):
  1435. window = min((self.max_window+1)//2, len(addresses))
  1436. if client_hello:
  1437. addresses.sort(key=self.tcp_connection_time_with_clienthello.__getitem__)
  1438. else:
  1439. addresses.sort(key=self.tcp_connection_time.__getitem__)
  1440. addrs = addresses[:window] + random.sample(addresses, window)
  1441. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1442. for addr in addrs:
  1443. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  1444. for i in range(len(addrs)):
  1445. sock = queobj.get()
  1446. if not isinstance(sock, Exception):
  1447. first_tcp_time = self.tcp_connection_time[sock.getpeername()] if not cache_key else 0
  1448. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, first_tcp_time))
  1449. return sock
  1450. elif i == 0:
  1451. # only output first error
  1452. logging.warning('create_tcp_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1453. if isinstance(sock, Exception):
  1454. raise sock
  1455. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  1456. cache_key = kwargs.get('cache_key')
  1457. validate = kwargs.get('validate')
  1458. def create_connection(ipaddr, timeout, queobj):
  1459. sock = None
  1460. ssl_sock = None
  1461. try:
  1462. # create a ipv4/ipv6 socket object
  1463. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1464. # set reuseaddr option to avoid 10048 socket error
  1465. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1466. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1467. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1468. # disable negal algorithm to send http request quickly.
  1469. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1470. # set a short timeout to trigger timeout retry more quickly.
  1471. sock.settimeout(timeout or self.connect_timeout)
  1472. # pick up the certificate
  1473. if not validate:
  1474. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, do_handshake_on_connect=False)
  1475. else:
  1476. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, cert_reqs=ssl.CERT_REQUIRED, ca_certs=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cacert.pem'), do_handshake_on_connect=False)
  1477. ssl_sock.settimeout(timeout or self.connect_timeout)
  1478. # start connection time record
  1479. start_time = time.time()
  1480. # TCP connect
  1481. ssl_sock.connect(ipaddr)
  1482. connected_time = time.time()
  1483. # SSL handshake
  1484. ssl_sock.do_handshake()
  1485. handshaked_time = time.time()
  1486. # record TCP connection time
  1487. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1488. # record SSL connection time
  1489. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1490. ssl_sock.ssl_time = connected_time - start_time
  1491. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1492. ssl_sock.sock = sock
  1493. # verify SSL certificate.
  1494. if validate and hostname.endswith('.appspot.com'):
  1495. cert = ssl_sock.getpeercert()
  1496. orgname = next((v for ((k, v),) in cert['subject'] if k == 'organizationName'))
  1497. if not orgname.lower().startswith('google '):
  1498. raise ssl.SSLError("%r certificate organizationName(%r) not startswith 'Google'" % (hostname, orgname))
  1499. # put ssl socket object to output queobj
  1500. queobj.put(ssl_sock)
  1501. except (socket.error, ssl.SSLError, OSError) as e:
  1502. # any socket.error, put Excpetions to output queobj.
  1503. queobj.put(e)
  1504. # reset a large and random timeout to the ipaddr
  1505. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1506. # close ssl socket
  1507. if ssl_sock:
  1508. ssl_sock.close()
  1509. # close tcp socket
  1510. if sock:
  1511. sock.close()
  1512. def create_connection_withopenssl(ipaddr, timeout, queobj):
  1513. sock = None
  1514. ssl_sock = None
  1515. try:
  1516. # create a ipv4/ipv6 socket object
  1517. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1518. # set reuseaddr option to avoid 10048 socket error
  1519. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1520. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1521. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1522. # disable negal algorithm to send http request quickly.
  1523. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1524. # set a short timeout to trigger timeout retry more quickly.
  1525. sock.settimeout(timeout or self.connect_timeout)
  1526. # pick up the certificate
  1527. server_hostname = b'www.google.com' if hostname.endswith('.appspot.com') else None
  1528. ssl_sock = SSLConnection(self.openssl_context, sock)
  1529. ssl_sock.set_connect_state()
  1530. if server_hostname:
  1531. ssl_sock.set_tlsext_host_name(server_hostname)
  1532. # start connection time record
  1533. start_time = time.time()
  1534. # TCP connect
  1535. ssl_sock.connect(ipaddr)
  1536. connected_time = time.time()
  1537. # SSL handshake
  1538. ssl_sock.do_handshake()
  1539. handshaked_time = time.time()
  1540. # record TCP connection time
  1541. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1542. # record SSL connection time
  1543. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1544. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1545. ssl_sock.sock = sock
  1546. # verify SSL certificate.
  1547. if validate and hostname.endswith('.appspot.com'):
  1548. cert = ssl_sock.get_peer_certificate()
  1549. commonname = next((v for k, v in cert.get_subject().get_components() if k == 'CN'))
  1550. if '.google' not in commonname and not commonname.endswith('.appspot.com'):
  1551. raise socket.error("Host name '%s' doesn't match certificate host '%s'" % (hostname, commonname))
  1552. # put ssl socket object to output queobj
  1553. queobj.put(ssl_sock)
  1554. except (socket.error, OpenSSL.SSL.Error, OSError) as e:
  1555. # any socket.error, put Excpetions to output queobj.
  1556. queobj.put(e)
  1557. # reset a large and random timeout to the ipaddr
  1558. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1559. # close ssl socket
  1560. if ssl_sock:
  1561. ssl_sock.close()
  1562. # close tcp socket
  1563. if sock:
  1564. sock.close()
  1565. def close_connection(count, queobj, first_tcp_time, first_ssl_time):
  1566. for _ in range(count):
  1567. sock = queobj.get()
  1568. ssl_time_threshold = min(1, 1.3 * first_ssl_time)
  1569. if sock and not isinstance(sock, Exception):
  1570. if cache_key and sock.ssl_time < ssl_time_threshold:
  1571. cache_queue = self.ssl_connection_cache[cache_key]
  1572. if cache_queue.qsize() < 8:
  1573. try:
  1574. _, old_sock = cache_queue.get_nowait()
  1575. old_sock.close()
  1576. except Queue.Empty:
  1577. pass
  1578. cache_queue.put((time.time(), sock))
  1579. else:
  1580. sock.close()
  1581. try:
  1582. while cache_key:
  1583. ctime, sock = self.ssl_connection_cache[cache_key].get_nowait()
  1584. if time.time() - ctime < 30:
  1585. return sock
  1586. else:
  1587. sock.close()
  1588. except Queue.Empty:
  1589. pass
  1590. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1591. sock = None
  1592. for _ in range(kwargs.get('max_retry', 3)):
  1593. window = min((self.max_window+1)//2, len(addresses))
  1594. addresses.sort(key=self.ssl_connection_time.__getitem__)
  1595. addrs = addresses[:window] + random.sample(addresses, window)
  1596. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1597. for addr in addrs:
  1598. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  1599. for i in range(len(addrs)):
  1600. sock = queobj.get()
  1601. if not isinstance(sock, Exception):
  1602. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, sock.tcp_time, sock.ssl_time))
  1603. return sock
  1604. elif i == 0:
  1605. # only output first error
  1606. logging.warning('create_ssl_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1607. if isinstance(sock, Exception):
  1608. raise sock
  1609. def create_http_request(self, method, url, headers, body, timeout, max_retry=2, bufsize=8192, crlf=None, validate=None, cache_key=None):
  1610. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  1611. if netloc.rfind(':') <= netloc.rfind(']'):
  1612. # no port number
  1613. host = netloc
  1614. port = 443 if scheme == 'https' else 80
  1615. else:
  1616. host, _, port = netloc.rpartition(':')
  1617. port = int(port)
  1618. if query:
  1619. path += '?' + query
  1620. if 'Host' not in headers:
  1621. headers['Host'] = host
  1622. if body and 'Content-Length' not in headers:
  1623. headers['Content-Length'] = str(len(body))
  1624. sock = None
  1625. for i in range(max_retry):
  1626. try:
  1627. create_connection = self.create_ssl_connection if scheme == 'https' else self.create_tcp_connection
  1628. sock = create_connection(host, port, timeout, validate=validate, cache_key=cache_key)
  1629. break
  1630. except StandardError as e:
  1631. logging.exception('create_http_request "%s %s" failed:%s', method, url, e)
  1632. if sock:
  1633. sock.close()
  1634. if i == max_retry - 1:
  1635. raise
  1636. request_data = ''
  1637. crlf_counter = 0
  1638. if scheme != 'https' and crlf:
  1639. fakeheaders = dict((k.title(), v) for k, v in headers.items())
  1640. fakeheaders.pop('Content-Length', None)
  1641. fakeheaders.pop('Cookie', None)
  1642. fakeheaders.pop('Host', None)
  1643. if 'User-Agent' not in fakeheaders:
  1644. fakeheaders['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1878.0 Safari/537.36'
  1645. if 'Accept-Language' not in fakeheaders:
  1646. fakeheaders['Accept-Language'] = 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4'
  1647. if 'Accept' not in fakeheaders:
  1648. fakeheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
  1649. fakeheaders_data = ''.join('%s: %s\r\n' % (k, v) for k, v in fakeheaders.items() if k not in self.skip_headers)
  1650. while crlf_counter < 5 or len(request_data) < 1500 * 2:
  1651. request_data += 'GET / HTTP/1.1\r\n%s\r\n' % fakeheaders_data
  1652. crlf_counter += 1
  1653. request_data += '\r\n\r\n\r\n'
  1654. request_data += '%s %s %s\r\n' % (method, path, self.protocol_version)
  1655. request_data += ''.join('%s: %s\r\n' % (k.title(), v) for k, v in headers.items() if k.title() not in self.skip_headers)
  1656. request_data += '\r\n'
  1657. if isinstance(body, bytes):
  1658. sock.sendall(request_data.encode() + body)
  1659. elif hasattr(body, 'read'):
  1660. sock.sendall(request_data)
  1661. while 1:
  1662. data = body.read(bufsize)
  1663. if not data:
  1664. break
  1665. sock.sendall(data)
  1666. else:
  1667. raise TypeError('create_http_request(body) must be a string or buffer, not %r' % type(body))
  1668. response = None
  1669. try:
  1670. while crlf_counter:
  1671. response = httplib.HTTPResponse(sock, buffering=False)
  1672. response.begin()
  1673. response.read()
  1674. response.close()
  1675. crlf_counter -= 1
  1676. except StandardError as e:
  1677. logging.exception('crlf skip read host=%r path=%r error: %r', headers.get('Host'), path, e)
  1678. if response:
  1679. if response.fp and response.fp._sock:
  1680. response.fp._sock.close()
  1681. response.close()
  1682. if sock:
  1683. sock.close()
  1684. return None
  1685. response = httplib.HTTPResponse(sock, buffering=True)
  1686. response.begin()
  1687. if self.ssl_connection_keepalive and scheme == 'https' and cache_key:
  1688. response.cache_key = cache_key
  1689. response.cache_sock = response.fp._sock
  1690. return response
  1691. def handle_urlfetch_response_close(self, fetchserver, response):
  1692. cache_sock = getattr(response, 'cache_sock', None)
  1693. if cache_sock:
  1694. if self.scheme == 'https':
  1695. self.ssl_connection_cache[response.cache_key].put((time.time(), cache_sock))
  1696. else:
  1697. cache_sock.close()
  1698. del response.cache_sock
  1699. def handle_urlfetch_error(self, fetchserver, response):
  1700. pass
  1701. class Common(object):
  1702. """Global Config Object"""
  1703. ENV_CONFIG_PREFIX = 'GOAGENT_'
  1704. def __init__(self):
  1705. """load config from proxy.ini"""
  1706. ConfigParser.RawConfigParser.OPTCRE = re.compile(r'(?P<option>[^=\s][^=]*)\s*(?P<vi>[=])\s*(?P<value>.*)$')
  1707. self.CONFIG = ConfigParser.ConfigParser()
  1708. self.CONFIG_FILENAME = os.path.splitext(os.path.abspath(__file__))[0]+'.ini'
  1709. self.CONFIG_USER_FILENAME = re.sub(r'\.ini$', '.user.ini', self.CONFIG_FILENAME)
  1710. self.CONFIG.read([self.CONFIG_FILENAME, self.CONFIG_USER_FILENAME])
  1711. for key, value in os.environ.items():
  1712. m = re.match(r'^%s([A-Z]+)_([A-Z\_\-]+)$' % self.ENV_CONFIG_PREFIX, key)
  1713. if m:
  1714. self.CONFIG.set(m.group(1).lower(), m.group(2).lower(), value)
  1715. self.LISTEN_IP = self.CONFIG.get('listen', 'ip')
  1716. self.LISTEN_PORT = self.CONFIG.getint('listen', 'port')
  1717. self.LISTEN_USERNAME = self.CONFIG.get('listen', 'username') if self.CONFIG.has_option('listen', 'username') else ''
  1718. self.LISTEN_PASSWORD = self.CONFIG.get('listen', 'password') if self.CONFIG.has_option('listen', 'password') else ''
  1719. self.LISTEN_VISIBLE = self.CONFIG.getint('listen', 'visible')
  1720. self.LISTEN_DEBUGINFO = self.CONFIG.getint('listen', 'debuginfo')
  1721. self.GAE_APPIDS = re.findall(r'[\w\-\.]+', self.CONFIG.get('gae', 'appid').replace('.appspot.com', ''))
  1722. self.GAE_PASSWORD = self.CONFIG.get('gae', 'password').strip()
  1723. self.GAE_PATH = self.CONFIG.get('gae', 'path')
  1724. self.GAE_MODE = self.CONFIG.get('gae', 'mode')
  1725. self.GAE_PROFILE = self.CONFIG.get('gae', 'profile').strip()
  1726. self.GAE_WINDOW = self.CONFIG.getint('gae', 'window')
  1727. self.GAE_KEEPALIVE = self.CONFIG.getint('gae', 'keepalive') if self.CONFIG.has_option('gae', 'keepalive') else 0
  1728. self.GAE_OBFUSCATE = self.CONFIG.getint('gae', 'obfuscate')
  1729. self.GAE_VALIDATE = self.CONFIG.getint('gae', 'validate')
  1730. self.GAE_TRANSPORT = self.CONFIG.getint('gae', 'transport') if self.CONFIG.has_option('gae', 'transport') else 0
  1731. self.GAE_OPTIONS = self.CONFIG.get('gae', 'options')
  1732. self.GAE_REGIONS = set(x.upper() for x in self.CONFIG.get('gae', 'regions').split('|') if x.strip())
  1733. if self.GAE_PROFILE == 'auto':
  1734. try:
  1735. socket.create_connection(('2001:4860:4860::8888', 53), timeout=1).close()
  1736. logging.info('Use profile ipv6')
  1737. self.GAE_PROFILE = 'ipv6'
  1738. except socket.error as e:
  1739. logging.info('Fail try profile ipv6 %r, fallback ipv4', e)
  1740. self.GAE_PROFILE = 'ipv4'
  1741. hosts_section, http_section = '%s/hosts' % self.GAE_PROFILE, '%s/http' % self.GAE_PROFILE
  1742. if 'USERDNSDOMAIN' in os.environ and re.match(r'^\w+\.\w+$', os.environ['USERDNSDOMAIN']):
  1743. self.CONFIG.set(hosts_section, '.' + os.environ['USERDNSDOMAIN'], '')
  1744. self.HOST_MAP = collections.OrderedDict((k, v or k) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and not k.startswith('.'))
  1745. self.HOST_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and k.startswith('.'))
  1746. self.HOST_POSTFIX_ENDSWITH = tuple(self.HOST_POSTFIX_MAP)
  1747. self.HOSTPORT_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and not k.startswith('.'))
  1748. self.HOSTPORT_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and k.startswith('.'))
  1749. self.HOSTPORT_POSTFIX_ENDSWITH = tuple(self.HOSTPORT_POSTFIX_MAP)
  1750. self.URLRE_MAP = collections.OrderedDict((re.compile(k).match, v) for k, v in self.CONFIG.items(hosts_section) if '\\' in k)
  1751. self.HTTP_WITHGAE = set(self.CONFIG.get(http_section, 'withgae').split('|'))
  1752. self.HTTP_CRLFSITES = tuple(self.CONFIG.get(http_section, 'crlfsites').split('|'))
  1753. self.HTTP_FORCEHTTPS = set(self.CONFIG.get(http_section, 'forcehttps').split('|'))
  1754. self.HTTP_FAKEHTTPS = set(self.CONFIG.get(http_section, 'fakehttps').split('|'))
  1755. self.HTTP_DNS = self.CONFIG.get(http_section, 'dns').split('|') if self.CONFIG.has_option(http_section, 'dns') else []
  1756. self.IPLIST_MAP = collections.OrderedDict((k, v.split('|')) for k, v in self.CONFIG.items('iplist'))
  1757. self.IPLIST_MAP.update((k, [k]) for k, v in self.HOST_MAP.items() if k == v)
  1758. self.PAC_ENABLE = self.CONFIG.getint('pac', 'enable')
  1759. self.PAC_IP = self.CONFIG.get('pac', 'ip')
  1760. self.PAC_PORT = self.CONFIG.getint('pac', 'port')
  1761. self.PAC_FILE = self.CONFIG.get('pac', 'file').lstrip('/')
  1762. self.PAC_GFWLIST = self.CONFIG.get('pac', 'gfwlist')
  1763. self.PAC_ADBLOCK = self.CONFIG.get('pac', 'adblock')
  1764. self.PAC_ADMODE = self.CONFIG.getint('pac', 'admode')
  1765. self.PAC_EXPIRED = self.CONFIG.getint('pac', 'expired')
  1766. self.PHP_ENABLE = self.CONFIG.getint('php', 'enable')
  1767. self.PHP_LISTEN = self.CONFIG.get('php', 'listen')
  1768. self.PHP_PASSWORD = self.CONFIG.get('php', 'password') if self.CONFIG.has_option('php', 'password') else ''
  1769. self.PHP_CRLF = self.CONFIG.getint('php', 'crlf') if self.CONFIG.has_option('php', 'crlf') else 1
  1770. self.PHP_VALIDATE = self.CONFIG.getint('php', 'validate') if self.CONFIG.has_option('php', 'validate') else 0
  1771. self.PHP_FETCHSERVER = self.CONFIG.get('php', 'fetchserver')
  1772. self.PHP_USEHOSTS = self.CONFIG.getint('php', 'usehosts')
  1773. self.PROXY_ENABLE = self.CONFIG.getint('proxy', 'enable')
  1774. self.PROXY_AUTODETECT = self.CONFIG.getint('proxy', 'autodetect') if self.CONFIG.has_option('proxy', 'autodetect') else 0
  1775. self.PROXY_HOST = self.CONFIG.get('proxy', 'host')
  1776. self.PROXY_PORT = self.CONFIG.getint('proxy', 'port')
  1777. self.PROXY_USERNAME = self.CONFIG.get('proxy', 'username')
  1778. self.PROXY_PASSWROD = self.CONFIG.get('proxy', 'password')
  1779. if not self.PROXY_ENABLE and self.PROXY_AUTODETECT:
  1780. system_proxy = ProxyUtil.get_system_proxy()
  1781. if system_proxy and self.LISTEN_IP not in system_proxy:
  1782. _, username, password, address = ProxyUtil.parse_proxy(system_proxy)
  1783. proxyhost, _, proxyport = address.rpartition(':')
  1784. self.PROXY_ENABLE = 1
  1785. self.PROXY_USERNAME = username
  1786. self.PROXY_PASSWROD = password
  1787. self.PROXY_HOST = proxyhost
  1788. self.PROXY_PORT = int(proxyport)
  1789. if self.PROXY_ENABLE:
  1790. self.GAE_MODE = 'https'
  1791. self.AUTORANGE_HOSTS = self.CONFIG.get('autorange', 'hosts').split('|')
  1792. self.AUTORANGE_HOSTS_MATCH = [re.compile(fnmatch.translate(h)).match for h in self.AUTORANGE_HOSTS]
  1793. self.AUTORANGE_ENDSWITH = tuple(self.CONFIG.get('autorange', 'endswith').split('|'))
  1794. self.AUTORANGE_NOENDSWITH = tuple(self.CONFIG.get('autorange', 'noendswith').split('|'))
  1795. self.AUTORANGE_MAXSIZE = self.CONFIG.getint('autorange', 'maxsize')
  1796. self.AUTORANGE_WAITSIZE = self.CONFIG.getint('autorange', 'waitsize')
  1797. self.AUTORANGE_BUFSIZE = self.CONFIG.getint('autorange', 'bufsize')
  1798. self.AUTORANGE_THREADS = self.CONFIG.getint('autorange', 'threads')
  1799. self.FETCHMAX_LOCAL = self.CONFIG.getint('fetchmax', 'local') if self.CONFIG.get('fetchmax', 'local') else 3
  1800. self.FETCHMAX_SERVER = self.CONFIG.get('fetchmax', 'server')
  1801. self.DNS_ENABLE = self.CONFIG.getint('dns', 'enable')
  1802. self.DNS_LISTEN = self.CONFIG.get('dns', 'listen')
  1803. self.DNS_SERVERS = self.HTTP_DNS or self.CONFIG.get('dns', 'servers').split('|')
  1804. self.DNS_BLACKLIST = set(self.CONFIG.get('dns', 'blacklist').split('|'))
  1805. self.DNS_TCPOVER = tuple(self.CONFIG.get('dns', 'tcpover').split('|'))
  1806. self.USERAGENT_ENABLE = self.CONFIG.getint('useragent', 'enable')
  1807. self.USERAGENT_STRING = self.CONFIG.get('useragent', 'string')
  1808. self.LOVE_ENABLE = self.CONFIG.getint('love', 'enable')
  1809. self.LOVE_TIP = self.CONFIG.get('love', 'tip').encode('utf8').decode('unicode-escape').split('|')
  1810. def resolve_iplist(self):
  1811. def do_resolve(host, dnsservers, queue):
  1812. try:
  1813. iplist = dnslib_record2iplist(dnslib_resolve_over_udp(host, dnsservers, timeout=2, blacklist=self.DNS_BLACKLIST))
  1814. queue.put((host, dnsservers, iplist or []))
  1815. except (socket.error, OSError) as e:
  1816. logging.warning('resolve remote host=%r failed: %s', host, e)
  1817. queue.put((host, dnsservers, []))
  1818. # https://support.google.com/websearch/answer/186669?hl=zh-Hans
  1819. google_blacklist = ['216.239.32.20'] + list(self.DNS_BLACKLIST)
  1820. for name, need_resolve_hosts in list(self.IPLIST_MAP.items()):
  1821. if all(re.match(r'\d+\.\d+\.\d+\.\d+', x) or ':' in x for x in need_resolve_hosts):
  1822. continue
  1823. need_resolve_remote = [x for x in need_resolve_hosts if ':' not in x and not re.match(r'\d+\.\d+\.\d+\.\d+', x)]
  1824. resolved_iplist = [x for x in need_resolve_hosts if x not in need_resolve_remote]
  1825. result_queue = Queue.Queue()
  1826. for host in need_resolve_remote:
  1827. for dnsserver in self.DNS_SERVERS:
  1828. logging.debug('resolve remote host=%r from dnsserver=%r', host, dnsserver)
  1829. thread.start_new_thread(do_resolve, (host, [dnsserver], result_queue))
  1830. for _ in xrange(len(self.DNS_SERVERS) * len(need_resolve_remote)):
  1831. try:
  1832. host, dnsservers, iplist = result_queue.get(timeout=2)
  1833. resolved_iplist += iplist or []
  1834. logging.debug('resolve remote host=%r from dnsservers=%s return iplist=%s', host, dnsservers, iplist)
  1835. except Queue.Empty:
  1836. logging.warn('resolve remote timeout, try resolve local')
  1837. resolved_iplist += sum([socket.gethostbyname_ex(x)[-1] for x in need_resolve_remote], [])
  1838. break
  1839. if name.startswith('google_') and name not in ('google_cn', 'google_hk'):
  1840. iplist_prefix = re.split(r'[\.:]', resolved_iplist[0])[0]
  1841. resolved_iplist = list(set(x for x in resolved_iplist if x.startswith(iplist_prefix)))
  1842. else:
  1843. resolved_iplist = list(set(resolved_iplist))
  1844. if name.startswith('google_'):
  1845. resolved_iplist = list(set(resolved_iplist) - set(google_blacklist))
  1846. if len(resolved_iplist) == 0:
  1847. logging.error('resolve %s host return empty! please retry!', name)
  1848. sys.exit(-1)
  1849. logging.info('resolve name=%s host to iplist=%r', name, resolved_iplist)
  1850. common.IPLIST_MAP[name] = resolved_iplist
  1851. def info(self):
  1852. info = ''
  1853. info += '------------------------------------------------------\n'
  1854. info += 'GoAgent Version : %s (python/%s %spyopenssl/%s)\n' % (__version__, sys.version[:5], gevent and 'gevent/%s ' % gevent.__version__ or '', getattr(OpenSSL, '__version__', 'Disabled'))
  1855. info += 'Uvent Version : %s (pyuv/%s libuv/%s)\n' % (__import__('uvent').__version__, __import__('pyuv').__version__, __import__('pyuv').LIBUV_VERSION) if all(x in sys.modules for x in ('pyuv', 'uvent')) else ''
  1856. info += 'Listen Address : %s:%d\n' % (self.LISTEN_IP, self.LISTEN_PORT)
  1857. info += 'Local Proxy : %s:%s\n' % (self.PROXY_HOST, self.PROXY_PORT) if self.PROXY_ENABLE else ''
  1858. info += 'Debug INFO : %s\n' % self.LISTEN_DEBUGINFO if self.LISTEN_DEBUGINFO else ''
  1859. info += 'GAE Mode : %s\n' % self.GAE_MODE
  1860. info += 'GAE Profile : %s\n' % self.GAE_PROFILE if self.GAE_PROFILE else ''
  1861. info += 'GAE APPID : %s\n' % '|'.join(self.GAE_APPIDS)
  1862. info += 'GAE Validate : %s\n' % self.GAE_VALIDATE if self.GAE_VALIDATE else ''
  1863. info += 'GAE Obfuscate : %s\n' % self.GAE_OBFUSCATE if self.GAE_OBFUSCATE else ''
  1864. if common.PAC_ENABLE:
  1865. info += 'Pac Server : http://%s:%d/%s\n' % (self.PAC_IP if self.PAC_IP and self.PAC_IP != '0.0.0.0' else ProxyUtil.get_listen_ip(), self.PAC_PORT, self.PAC_FILE)
  1866. info += 'Pac File : file://%s\n' % os.path.join(os.path.dirname(os.path.abspath(__file__)), self.PAC_FILE).replace('\\', '/')
  1867. if common.PHP_ENABLE:
  1868. info += 'PHP Listen : %s\n' % common.PHP_LISTEN
  1869. info += 'PHP FetchServer : %s\n' % common.PHP_FETCHSERVER
  1870. if common.DNS_ENABLE:
  1871. info += 'DNS Listen : %s\n' % common.DNS_LISTEN
  1872. info += 'DNS Servers : %s\n' % '|'.join(common.DNS_SERVERS)
  1873. info += '------------------------------------------------------\n'
  1874. return info
  1875. common = Common()
  1876. def message_html(title, banner, detail=''):
  1877. MESSAGE_TEMPLATE = '''
  1878. <html><head>
  1879. <meta http-equiv="content-type" content="text/html;charset=utf-8">
  1880. <title>$title</title>
  1881. <style><!--
  1882. body {font-family: arial,sans-serif}
  1883. div.nav {margin-top: 1ex}
  1884. div.nav A {font-size: 10pt; font-family: arial,sans-serif}
  1885. span.nav {font-size: 10pt; font-family: arial,sans-serif; font-weight: bold}
  1886. div.nav A,span.big {font-size: 12pt; color: #0000cc}
  1887. div.nav A {font-size: 10pt; color: black}
  1888. A.l:link {color: #6f6f6f}
  1889. A.u:link {color: green}
  1890. //--></style>
  1891. </head>
  1892. <body text=#000000 bgcolor=#ffffff>
  1893. <table border=0 cellpadding=2 cellspacing=0 width=100%>
  1894. <tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff><b>Message</b></td></tr>
  1895. <tr><td> </td></tr></table>
  1896. <blockquote>
  1897. <H1>$banner</H1>
  1898. $detail
  1899. <p>
  1900. </blockquote>
  1901. <table width=100% cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img alt="" width=1 height=4></td></tr></table>
  1902. </body></html>
  1903. '''
  1904. return string.Template(MESSAGE_TEMPLATE).substitute(title=title, banner=banner, detail=detail)
  1905. try:
  1906. from Crypto.Cipher.ARC4 import new as RC4Cipher
  1907. except ImportError:
  1908. logging.warn('Load Crypto.Cipher.ARC4 Failed, Use Pure Python Instead.')
  1909. class RC4Cipher(object):
  1910. def __init__(self, key):
  1911. x = 0
  1912. box = range(256)
  1913. for i, y in enumerate(box):
  1914. x = (x + y + ord(key[i % len(key)])) & 0xff
  1915. box[i], box[x] = box[x], y
  1916. self.__box = box
  1917. self.__x = 0
  1918. self.__y = 0
  1919. def encrypt(self, data):
  1920. out = []
  1921. out_append = out.append
  1922. x = self.__x
  1923. y = self.__y
  1924. box = self.__box
  1925. for char in data:
  1926. x = (x + 1) & 0xff
  1927. y = (y + box[x]) & 0xff
  1928. box[x], box[y] = box[y], box[x]
  1929. out_append(chr(ord(char) ^ box[(box[x] + box[y]) & 0xff]))
  1930. self.__x = x
  1931. self.__y = y
  1932. return ''.join(out)
  1933. class XORCipher(object):
  1934. """XOR Cipher Class"""
  1935. def __init__(self, key):
  1936. self.__key_gen = itertools.cycle([ord(x) for x in key]).next
  1937. self.__key_xor = lambda s: ''.join(chr(ord(x) ^ self.__key_gen()) for x in s)
  1938. if len(key) == 1:
  1939. try:
  1940. from Crypto.Util.strxor import strxor_c
  1941. c = ord(key)
  1942. self.__key_xor = lambda s: strxor_c(s, c)
  1943. except ImportError:
  1944. sys.stderr.write('Load Crypto.Util.strxor Failed, Use Pure Python Instead.\n')
  1945. def encrypt(self, data):
  1946. return self.__key_xor(data)
  1947. class CipherFileObject(object):
  1948. """fileobj wrapper for cipher"""
  1949. def __init__(self, fileobj, cipher):
  1950. self.__fileobj = fileobj
  1951. self.__cipher = cipher
  1952. def __getattr__(self, attr):
  1953. if attr not in ('__fileobj', '__cipher'):
  1954. return getattr(self.__fileobj, attr)
  1955. def read(self, size=-1):
  1956. return self.__cipher.encrypt(self.__fileobj.read(size))
  1957. class LocalProxyServer(SocketServer.ThreadingTCPServer):
  1958. """Local Proxy Server"""
  1959. allow_reuse_address = True
  1960. daemon_threads = True
  1961. def close_request(self, request):
  1962. try:
  1963. request.close()
  1964. except StandardError:
  1965. pass
  1966. def finish_request(self, request, client_address):
  1967. try:
  1968. self.RequestHandlerClass(request, client_address, self)
  1969. except NetWorkIOError as e:
  1970. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1971. raise
  1972. def handle_error(self, *args):
  1973. """make ThreadingTCPServer happy"""
  1974. exc_info = sys.exc_info()
  1975. error = exc_info and len(exc_info) and exc_info[1]
  1976. if isinstance(error, NetWorkIOError) and len(error.args) > 1 and 'bad write retry' in error.args[1]:
  1977. exc_info = error = None
  1978. else:
  1979. del exc_info, error
  1980. SocketServer.ThreadingTCPServer.handle_error(self, *args)
  1981. class UserAgentFilter(BaseProxyHandlerFilter):
  1982. """user agent filter"""
  1983. def filter(self, handler):
  1984. if common.USERAGENT_ENABLE:
  1985. handler.headers['User-Agent'] = common.USERAGENT_STRING
  1986. class WithGAEFilter(BaseProxyHandlerFilter):
  1987. """with gae filter"""
  1988. def filter(self, handler):
  1989. if handler.host in common.HTTP_WITHGAE:
  1990. logging.debug('WithGAEFilter metched %r %r', handler.path, handler.headers)
  1991. # assume the last one handler is GAEFetchFilter
  1992. return handler.handler_filters[-1].filter(handler)
  1993. class ForceHttpsFilter(BaseProxyHandlerFilter):
  1994. """force https filter"""
  1995. def filter(self, handler):
  1996. if handler.command != 'CONNECT' and handler.host in common.HTTP_FORCEHTTPS and not handler.headers.get('Referer', '').startswith('https://') and not handler.path.startswith('https://'):
  1997. logging.debug('ForceHttpsFilter metched %r %r', handler.path, handler.headers)
  1998. headers = {'Location': handler.path.replace('http://', 'https://', 1), 'Connection': 'close'}
  1999. return [handler.MOCK, 301, headers, '']
  2000. class FakeHttpsFilter(BaseProxyHandlerFilter):
  2001. """fake https filter"""
  2002. def filter(self, handler):
  2003. if handler.command == 'CONNECT' and handler.host in common.HTTP_FAKEHTTPS:
  2004. logging.debug('FakeHttpsFilter metched %r %r', handler.path, handler.headers)
  2005. return [handler.STRIP, True, None]
  2006. class HostsFilter(BaseProxyHandlerFilter):
  2007. """force https filter"""
  2008. def filter_localfile(self, handler, filename):
  2009. content_type = None
  2010. try:
  2011. import mimetypes
  2012. content_type = mimetypes.types_map.get(os.path.splitext(filename)[1])
  2013. except StandardError as e:
  2014. logging.error('import mimetypes failed: %r', e)
  2015. try:
  2016. with open(filename, 'rb') as fp:
  2017. data = fp.read()
  2018. headers = {'Connection': 'close', 'Content-Length': str(len(data))}
  2019. if content_type:
  2020. headers['Content-Type'] = content_type
  2021. return [handler.MOCK, 200, headers, data]
  2022. except StandardError as e:
  2023. return [handler.MOCK, 403, {'Connection': 'close'}, 'read %r %r' % (filename, e)]
  2024. def filter(self, handler):
  2025. host, port = handler.host, handler.port
  2026. hostport = handler.path if handler.command == 'CONNECT' else '%s:%d' % (host, port)
  2027. hostname = ''
  2028. if host in common.HOST_MAP:
  2029. hostname = common.HOST_MAP[host] or host
  2030. elif host.endswith(common.HOST_POSTFIX_ENDSWITH):
  2031. hostname = next(common.HOST_POSTFIX_MAP[x] for x in common.HOST_POSTFIX_MAP if host.endswith(x)) or host
  2032. common.HOST_MAP[host] = hostname
  2033. if hostport in common.HOSTPORT_MAP:
  2034. hostname = common.HOSTPORT_MAP[hostport] or host
  2035. elif hostport.endswith(common.HOSTPORT_POSTFIX_ENDSWITH):
  2036. hostname = next(common.HOSTPORT_POSTFIX_MAP[x] for x in common.HOSTPORT_POSTFIX_MAP if hostport.endswith(x)) or host
  2037. common.HOSTPORT_MAP[hostport] = hostname
  2038. if handler.command != 'CONNECT' and common.URLRE_MAP:
  2039. try:
  2040. hostname = next(common.URLRE_MAP[x] for x in common.URLRE_MAP if x(handler.path)) or host
  2041. except StopIteration:
  2042. pass
  2043. if not hostname:
  2044. return None
  2045. elif hostname in common.IPLIST_MAP:
  2046. handler.dns_cache[host] = common.IPLIST_MAP[hostname]
  2047. elif hostname == host and host.endswith(common.DNS_TCPOVER) and host not in handler.dns_cache:
  2048. try:
  2049. iplist = dnslib_record2iplist(dnslib_resolve_over_tcp(host, handler.dns_servers, timeout=4, blacklist=handler.dns_blacklist))
  2050. logging.info('HostsFilter dnslib_resolve_over_tcp %r with %r return %s', host, handler.dns_servers, iplist)
  2051. handler.dns_cache[host] = iplist
  2052. except socket.error as e:
  2053. logging.warning('HostsFilter dnslib_resolve_over_tcp %r with %r failed: %r', host, handler.dns_servers, e)
  2054. elif re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2055. handler.dns_cache[host] = [hostname]
  2056. elif hostname.startswith('file://'):
  2057. filename = hostname.lstrip('file://')
  2058. if os.name == 'nt':
  2059. filename = filename.lstrip('/')
  2060. return self.filter_localfile(handler, filename)
  2061. cache_key = '%s:%s' % (hostname, port)
  2062. if handler.command == 'CONNECT':
  2063. return [handler.FORWARD, host, port, handler.connect_timeout, {'cache_key': cache_key}]
  2064. else:
  2065. if host.endswith(common.HTTP_CRLFSITES):
  2066. handler.close_connection = True
  2067. return [handler.DIRECT, {'crlf': True}]
  2068. else:
  2069. return [handler.DIRECT, {'cache_key': cache_key}]
  2070. class DirectRegionFilter(BaseProxyHandlerFilter):
  2071. """direct region filter"""
  2072. geoip = pygeoip.GeoIP(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'GeoIP.dat')) if pygeoip and common.GAE_REGIONS else None
  2073. region_cache = LRUCache(16*1024)
  2074. def get_country_code(self, hostname, dnsservers):
  2075. """http://dev.maxmind.com/geoip/legacy/codes/iso3166/"""
  2076. try:
  2077. return self.region_cache[hostname]
  2078. except KeyError:
  2079. pass
  2080. try:
  2081. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2082. iplist = [hostname]
  2083. elif dnsservers:
  2084. iplist = dnslib_record2iplist(dnslib_resolve_over_udp(hostname, dnsservers, timeout=2))
  2085. else:
  2086. iplist = socket.gethostbyname_ex(hostname)[-1]
  2087. country_code = self.geoip.country_code_by_addr(iplist[0])
  2088. except StandardError as e:
  2089. logging.warning('DirectRegionFilter cannot determine region for hostname=%r %r', hostname, e)
  2090. country_code = ''
  2091. self.region_cache[hostname] = country_code
  2092. return country_code
  2093. def filter(self, handler):
  2094. if self.geoip:
  2095. country_code = self.get_country_code(handler.host, handler.dns_servers)
  2096. if country_code in common.GAE_REGIONS:
  2097. if handler.command == 'CONNECT':
  2098. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  2099. else:
  2100. return [handler.DIRECT, {}]
  2101. class AutoRangeFilter(BaseProxyHandlerFilter):
  2102. """force https filter"""
  2103. def filter(self, handler):
  2104. path = urlparse.urlsplit(handler.path).path
  2105. need_autorange = any(x(handler.host) for x in common.AUTORANGE_HOSTS_MATCH) or path.endswith(common.AUTORANGE_ENDSWITH)
  2106. if path.endswith(common.AUTORANGE_NOENDSWITH) or 'range=' in urlparse.urlsplit(path).query or handler.command == 'HEAD':
  2107. need_autorange = False
  2108. if handler.command != 'HEAD' and handler.headers.get('Range'):
  2109. m = re.search(r'bytes=(\d+)-', handler.headers['Range'])
  2110. start = int(m.group(1) if m else 0)
  2111. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2112. logging.info('autorange range=%r match url=%r', handler.headers['Range'], handler.path)
  2113. elif need_autorange:
  2114. logging.info('Found [autorange]endswith match url=%r', handler.path)
  2115. m = re.search(r'bytes=(\d+)-', handler.headers.get('Range', ''))
  2116. start = int(m.group(1) if m else 0)
  2117. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2118. class GAEFetchFilter(BaseProxyHandlerFilter):
  2119. """force https filter"""
  2120. def filter(self, handler):
  2121. """https://developers.google.com/appengine/docs/python/urlfetch/"""
  2122. if handler.command == 'CONNECT':
  2123. do_ssl_handshake = 440 <= handler.port <= 450 or 1024 <= handler.port <= 65535
  2124. return [handler.STRIP, do_ssl_handshake, self if not common.URLRE_MAP else None]
  2125. elif handler.command in ('GET', 'POST', 'HEAD', 'PUT', 'DELETE', 'PATCH'):
  2126. kwargs = {}
  2127. if common.GAE_PASSWORD:
  2128. kwargs['password'] = common.GAE_PASSWORD
  2129. if common.GAE_VALIDATE:
  2130. kwargs['validate'] = 1
  2131. fetchservers = ['%s://%s.appspot.com%s' % (common.GAE_MODE, x, common.GAE_PATH) for x in common.GAE_APPIDS]
  2132. return [handler.URLFETCH, fetchservers, common.FETCHMAX_LOCAL, kwargs]
  2133. else:
  2134. if common.PHP_ENABLE:
  2135. return PHPProxyHandler.handler_filters[-1].filter(handler)
  2136. else:
  2137. logging.warning('"%s %s" not supported by GAE, please enable PHP mode!', handler.command, handler.host)
  2138. return [handler.DIRECT, {}]
  2139. class GAEProxyHandler(AdvancedProxyHandler):
  2140. """GAE Proxy Handler"""
  2141. handler_filters = [UserAgentFilter(), WithGAEFilter(), FakeHttpsFilter(), ForceHttpsFilter(), HostsFilter(), DirectRegionFilter(), AutoRangeFilter(), GAEFetchFilter()]
  2142. def first_run(self):
  2143. """GAEProxyHandler setup, init domain/iplist map"""
  2144. if not common.PROXY_ENABLE:
  2145. logging.info('resolve common.IPLIST_MAP names=%s to iplist', list(common.IPLIST_MAP))
  2146. common.resolve_iplist()
  2147. random.shuffle(common.GAE_APPIDS)
  2148. for appid in common.GAE_APPIDS:
  2149. host = '%s.appspot.com' % appid
  2150. if host not in common.HOST_MAP:
  2151. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.appspot.com']
  2152. if host not in self.dns_cache:
  2153. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2154. def handle_urlfetch_error(self, fetchserver, response):
  2155. gae_appid = urlparse.urlsplit(fetchserver).netloc.split('.')[-3]
  2156. if response.app_status == 503:
  2157. # appid over qouta, switch to next appid
  2158. if gae_appid == common.GAE_APPIDS[0] and len(common.GAE_APPIDS) > 1:
  2159. common.GAE_APPIDS.append(common.GAE_APPIDS.pop(0))
  2160. logging.info('gae_appid=%r over qouta, switch next appid=%r', gae_appid, common.GAE_APPIDS[0])
  2161. class PHPFetchFilter(BaseProxyHandlerFilter):
  2162. """force https filter"""
  2163. def filter(self, handler):
  2164. if handler.command == 'CONNECT':
  2165. return [handler.STRIP, True, self]
  2166. else:
  2167. kwargs = {}
  2168. if common.PHP_PASSWORD:
  2169. kwargs['password'] = common.PHP_PASSWORD
  2170. if common.PHP_VALIDATE:
  2171. kwargs['validate'] = 1
  2172. return [handler.URLFETCH, [common.PHP_FETCHSERVER], 1, kwargs]
  2173. class PHPProxyHandler(AdvancedProxyHandler):
  2174. """PHP Proxy Handler"""
  2175. first_run_lock = threading.Lock()
  2176. handler_filters = [UserAgentFilter(), FakeHttpsFilter(), ForceHttpsFilter(), PHPFetchFilter()]
  2177. def first_run(self):
  2178. if common.PHP_USEHOSTS:
  2179. self.handler_filters.insert(-1, HostsFilter())
  2180. if not common.PROXY_ENABLE:
  2181. common.resolve_iplist()
  2182. fetchhost = re.sub(r':\d+$', '', urlparse.urlsplit(common.PHP_FETCHSERVER).netloc)
  2183. logging.info('resolve common.PHP_FETCHSERVER domain=%r to iplist', fetchhost)
  2184. if common.PHP_USEHOSTS and fetchhost in common.HOST_MAP:
  2185. hostname = common.HOST_MAP[fetchhost]
  2186. fetchhost_iplist = sum([socket.gethostbyname_ex(x)[-1] for x in common.IPLIST_MAP.get(hostname) or hostname.split('|')], [])
  2187. else:
  2188. fetchhost_iplist = self.gethostbyname2(fetchhost)
  2189. if len(fetchhost_iplist) == 0:
  2190. logging.error('resolve %r domain return empty! please use ip list to replace domain list!', fetchhost)
  2191. sys.exit(-1)
  2192. self.dns_cache[fetchhost] = list(set(fetchhost_iplist))
  2193. logging.info('resolve common.PHP_FETCHSERVER domain to iplist=%r', fetchhost_iplist)
  2194. return True
  2195. class ProxyChainMixin:
  2196. """proxy chain mixin"""
  2197. def gethostbyname2(self, hostname):
  2198. try:
  2199. return socket.gethostbyname_ex(hostname)[-1]
  2200. except socket.error:
  2201. return [hostname]
  2202. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  2203. sock = socket.create_connection((common.PROXY_HOST, int(common.PROXY_PORT)))
  2204. if hostname.endswith('.appspot.com'):
  2205. hostname = 'www.google.com'
  2206. request_data = 'CONNECT %s:%s HTTP/1.1\r\n' % (hostname, port)
  2207. if common.PROXY_USERNAME and common.PROXY_PASSWROD:
  2208. request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (common.PROXY_USERNAME, common.PROXY_PASSWROD)).encode()).decode().strip()
  2209. request_data += '\r\n'
  2210. sock.sendall(request_data)
  2211. response = httplib.HTTPResponse(sock, buffering=False)
  2212. response.begin()
  2213. if response.status >= 400:
  2214. raise httplib.BadStatusLine('%s %s %s' % (response.version, response.status, response.reason))
  2215. return sock
  2216. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  2217. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  2218. ssl_sock = ssl.wrap_socket(sock)
  2219. return ssl_sock
  2220. class GreenForwardMixin:
  2221. """green forward mixin"""
  2222. @staticmethod
  2223. def io_copy(dest, source, timeout, bufsize):
  2224. try:
  2225. dest.settimeout(timeout)
  2226. source.settimeout(timeout)
  2227. while 1:
  2228. data = source.recv(bufsize)
  2229. if not data:
  2230. break
  2231. dest.sendall(data)
  2232. except socket.timeout:
  2233. pass
  2234. except NetWorkIOError as e:
  2235. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  2236. raise
  2237. if e.args[0] in (errno.EBADF,):
  2238. return
  2239. finally:
  2240. for sock in (dest, source):
  2241. try:
  2242. sock.close()
  2243. except StandardError:
  2244. pass
  2245. def forward_socket(self, local, remote, timeout):
  2246. """forward socket"""
  2247. bufsize = self.bufsize
  2248. thread.start_new_thread(GreenForwardMixin.io_copy, (remote.dup(), local.dup(), timeout, bufsize))
  2249. GreenForwardMixin.io_copy(local, remote, timeout, bufsize)
  2250. class ProxyChainGAEProxyHandler(ProxyChainMixin, GAEProxyHandler):
  2251. pass
  2252. class ProxyChainPHPProxyHandler(ProxyChainMixin, PHPProxyHandler):
  2253. pass
  2254. class GreenForwardGAEProxyHandler(GreenForwardMixin, GAEProxyHandler):
  2255. pass
  2256. class GreenForwardPHPProxyHandler(GreenForwardMixin, PHPProxyHandler):
  2257. pass
  2258. class ProxyChainGreenForwardGAEProxyHandler(ProxyChainMixin, GreenForwardGAEProxyHandler):
  2259. pass
  2260. class ProxyChainGreenForwardPHPProxyHandler(ProxyChainMixin, GreenForwardPHPProxyHandler):
  2261. pass
  2262. def get_uptime():
  2263. if os.name == 'nt':
  2264. import ctypes
  2265. try:
  2266. tick = ctypes.windll.kernel32.GetTickCount64()
  2267. except AttributeError:
  2268. tick = ctypes.windll.kernel32.GetTickCount()
  2269. return tick / 1000.0
  2270. elif os.path.isfile('/proc/uptime'):
  2271. with open('/proc/uptime', 'rb') as fp:
  2272. uptime = fp.readline().strip().split()[0].strip()
  2273. return float(uptime)
  2274. elif any(os.path.isfile(os.path.join(x, 'uptime')) for x in os.environ['PATH'].split(os.pathsep)):
  2275. # http://www.opensource.apple.com/source/lldb/lldb-69/test/pexpect-2.4/examples/uptime.py
  2276. pattern = r'up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])'
  2277. output = os.popen('uptime').read()
  2278. duration, _, _, _, _ = re.search(pattern, output).groups()
  2279. days, hours, mins = 0, 0, 0
  2280. if 'day' in duration:
  2281. m = re.search(r'([0-9]+)\s+day', duration)
  2282. days = int(m.group(1))
  2283. if ':' in duration:
  2284. m = re.search(r'([0-9]+):([0-9]+)', duration)
  2285. hours = int(m.group(1))
  2286. mins = int(m.group(2))
  2287. if 'min' in duration:
  2288. m = re.search(r'([0-9]+)\s+min', duration)
  2289. mins = int(m.group(1))
  2290. return days * 86400 + hours * 3600 + mins * 60
  2291. else:
  2292. #TODO: support other platforms
  2293. return None
  2294. class PacUtil(object):
  2295. """GoAgent Pac Util"""
  2296. @staticmethod
  2297. def update_pacfile(filename):
  2298. listen_ip = '127.0.0.1'
  2299. autoproxy = '%s:%s' % (listen_ip, common.LISTEN_PORT)
  2300. blackhole = '%s:%s' % (listen_ip, common.PAC_PORT)
  2301. default = 'PROXY %s:%s' % (common.PROXY_HOST, common.PROXY_PORT) if common.PROXY_ENABLE else 'DIRECT'
  2302. opener = urllib2.build_opener(urllib2.ProxyHandler({'http': autoproxy, 'https': autoproxy}))
  2303. content = ''
  2304. need_update = True
  2305. with open(filename, 'rb') as fp:
  2306. content = fp.read()
  2307. try:
  2308. placeholder = '// AUTO-GENERATED RULES, DO NOT MODIFY!'
  2309. content = content[:content.index(placeholder)+len(placeholder)]
  2310. content = re.sub(r'''blackhole\s*=\s*['"]PROXY [\.\w:]+['"]''', 'blackhole = \'PROXY %s\'' % blackhole, content)
  2311. content = re.sub(r'''autoproxy\s*=\s*['"]PROXY [\.\w:]+['"]''', 'autoproxy = \'PROXY %s\'' % autoproxy, content)
  2312. content = re.sub(r'''defaultproxy\s*=\s*['"](DIRECT|PROXY [\.\w:]+)['"]''', 'defaultproxy = \'%s\'' % default, content)
  2313. content = re.sub(r'''host\s*==\s*['"][\.\w:]+['"]\s*\|\|\s*isPlainHostName''', 'host == \'%s\' || isPlainHostName' % listen_ip, content)
  2314. if content.startswith('//'):
  2315. line = '// Proxy Auto-Config file generated by autoproxy2pac, %s\r\n' % time.strftime('%Y-%m-%d %H:%M:%S')
  2316. content = line + '\r\n'.join(content.splitlines()[1:])
  2317. except ValueError:
  2318. need_update = False
  2319. try:
  2320. if common.PAC_ADBLOCK:
  2321. admode = common.PAC_ADMODE
  2322. logging.info('try download %r to update_pacfile(%r)', common.PAC_ADBLOCK, filename)
  2323. adblock_content = opener.open(common.PAC_ADBLOCK).read()
  2324. logging.info('%r downloaded, try convert it with adblock2pac', common.PAC_ADBLOCK)
  2325. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2326. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.adblock2pac, (adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode))
  2327. else:
  2328. jsrule = PacUtil.adblock2pac(adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode)
  2329. content += '\r\n' + jsrule + '\r\n'
  2330. logging.info('%r downloaded and parsed', common.PAC_ADBLOCK)
  2331. else:
  2332. content += '\r\nfunction FindProxyForURLByAdblock(url, host) {return "DIRECT";}\r\n'
  2333. except StandardError as e:
  2334. need_update = False
  2335. logging.exception('update_pacfile failed: %r', e)
  2336. try:
  2337. logging.info('try download %r to update_pacfile(%r)', common.PAC_GFWLIST, filename)
  2338. autoproxy_content = base64.b64decode(opener.open(common.PAC_GFWLIST).read())
  2339. logging.info('%r downloaded, try convert it with autoproxy2pac_lite', common.PAC_GFWLIST)
  2340. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2341. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.autoproxy2pac_lite, (autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default))
  2342. else:
  2343. jsrule = PacUtil.autoproxy2pac_lite(autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default)
  2344. content += '\r\n' + jsrule + '\r\n'
  2345. logging.info('%r downloaded and parsed', common.PAC_GFWLIST)
  2346. except StandardError as e:
  2347. need_update = False
  2348. logging.exception('update_pacfile failed: %r', e)
  2349. if need_update:
  2350. with open(filename, 'wb') as fp:
  2351. fp.write(content)
  2352. logging.info('%r successfully updated', filename)
  2353. @staticmethod
  2354. def autoproxy2pac(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2355. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2356. jsLines = []
  2357. for line in content.splitlines()[1:]:
  2358. if line and not line.startswith("!"):
  2359. use_proxy = True
  2360. if line.startswith("@@"):
  2361. line = line[2:]
  2362. use_proxy = False
  2363. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2364. if line.startswith('/') and line.endswith('/'):
  2365. jsLine = 'if (/%s/i.test(url)) return "%s";' % (line[1:-1], return_proxy)
  2366. elif line.startswith('||'):
  2367. domain = line[2:].lstrip('.')
  2368. if len(jsLines) > 0 and ('host.indexOf(".%s") >= 0' % domain in jsLines[-1] or 'host.indexOf("%s") >= 0' % domain in jsLines[-1]):
  2369. jsLines.pop()
  2370. jsLine = 'if (dnsDomainIs(host, ".%s") || host == "%s") return "%s";' % (domain, domain, return_proxy)
  2371. elif line.startswith('|'):
  2372. jsLine = 'if (url.indexOf("%s") == 0) return "%s";' % (line[1:], return_proxy)
  2373. elif '*' in line:
  2374. jsLine = 'if (shExpMatch(url, "*%s*")) return "%s";' % (line.strip('*'), return_proxy)
  2375. elif '/' not in line:
  2376. jsLine = 'if (host.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2377. else:
  2378. jsLine = 'if (url.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2379. jsLine = ' ' * indent + jsLine
  2380. if use_proxy:
  2381. jsLines.append(jsLine)
  2382. else:
  2383. jsLines.insert(0, jsLine)
  2384. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2385. return function
  2386. @staticmethod
  2387. def autoproxy2pac_lite(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2388. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2389. direct_domain_set = set([])
  2390. proxy_domain_set = set([])
  2391. for line in content.splitlines()[1:]:
  2392. if line and not line.startswith(('!', '|!', '||!')):
  2393. use_proxy = True
  2394. if line.startswith("@@"):
  2395. line = line[2:]
  2396. use_proxy = False
  2397. domain = ''
  2398. if line.startswith('/') and line.endswith('/'):
  2399. line = line[1:-1]
  2400. if line.startswith('^https?:\\/\\/[^\\/]+') and re.match(r'^(\w|\\\-|\\\.)+$', line[18:]):
  2401. domain = line[18:].replace(r'\.', '.')
  2402. else:
  2403. logging.warning('unsupport gfwlist regex: %r', line)
  2404. elif line.startswith('||'):
  2405. domain = line[2:].lstrip('*').rstrip('/')
  2406. elif line.startswith('|'):
  2407. domain = urlparse.urlsplit(line[1:]).netloc.lstrip('*')
  2408. elif line.startswith(('http://', 'https://')):
  2409. domain = urlparse.urlsplit(line).netloc.lstrip('*')
  2410. elif re.search(r'^([\w\-\_\.]+)([\*\/]|$)', line):
  2411. domain = re.split(r'[\*\/]', line)[0]
  2412. else:
  2413. pass
  2414. if '*' in domain:
  2415. domain = domain.split('*')[-1]
  2416. if not domain or re.match(r'^\w+$', domain):
  2417. logging.debug('unsupport gfwlist rule: %r', line)
  2418. continue
  2419. if use_proxy:
  2420. proxy_domain_set.add(domain)
  2421. else:
  2422. direct_domain_set.add(domain)
  2423. proxy_domain_list = sorted(set(x.lstrip('.') for x in proxy_domain_set))
  2424. autoproxy_host = ',\r\n'.join('%s"%s": 1' % (' '*indent, x) for x in proxy_domain_list)
  2425. template = '''\
  2426. var autoproxy_host = {
  2427. %(autoproxy_host)s
  2428. };
  2429. function %(func_name)s(url, host) {
  2430. var lastPos;
  2431. do {
  2432. if (autoproxy_host.hasOwnProperty(host)) {
  2433. return 'PROXY %(proxy)s';
  2434. }
  2435. lastPos = host.indexOf('.') + 1;
  2436. host = host.slice(lastPos);
  2437. } while (lastPos >= 1);
  2438. return '%(default)s';
  2439. }'''
  2440. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in template.splitlines()), '', template)
  2441. template_args = {'autoproxy_host': autoproxy_host,
  2442. 'func_name': func_name,
  2443. 'proxy': proxy,
  2444. 'default': default}
  2445. return template % template_args
  2446. @staticmethod
  2447. def urlfilter2pac(content, func_name='FindProxyForURLByUrlfilter', proxy='127.0.0.1:8086', default='DIRECT', indent=4):
  2448. """urlfilter.ini to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2449. jsLines = []
  2450. for line in content[content.index('[exclude]'):].splitlines()[1:]:
  2451. if line and not line.startswith(';'):
  2452. use_proxy = True
  2453. if line.startswith("@@"):
  2454. line = line[2:]
  2455. use_proxy = False
  2456. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2457. if '*' in line:
  2458. jsLine = 'if (shExpMatch(url, "%s")) return "%s";' % (line, return_proxy)
  2459. else:
  2460. jsLine = 'if (url == "%s") return "%s";' % (line, return_proxy)
  2461. jsLine = ' ' * indent + jsLine
  2462. if use_proxy:
  2463. jsLines.append(jsLine)
  2464. else:
  2465. jsLines.insert(0, jsLine)
  2466. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2467. return function
  2468. @staticmethod
  2469. def adblock2pac(content, func_name='FindProxyForURLByAdblock', proxy='127.0.0.1:8086', default='DIRECT', admode=1, indent=4):
  2470. """adblock list to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2471. white_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2472. black_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2473. for line in content.splitlines()[1:]:
  2474. if not line or line.startswith('!') or '##' in line or '#@#' in line:
  2475. continue
  2476. use_proxy = True
  2477. use_start = False
  2478. use_end = False
  2479. use_domain = False
  2480. use_postfix = []
  2481. if '$' in line:
  2482. posfixs = line.split('$')[-1].split(',')
  2483. if any('domain' in x for x in posfixs):
  2484. continue
  2485. if 'image' in posfixs:
  2486. use_postfix += ['.jpg', '.gif']
  2487. elif 'script' in posfixs:
  2488. use_postfix += ['.js']
  2489. else:
  2490. continue
  2491. line = line.split('$')[0]
  2492. if line.startswith("@@"):
  2493. line = line[2:]
  2494. use_proxy = False
  2495. if '||' == line[:2]:
  2496. line = line[2:]
  2497. if '/' not in line:
  2498. use_domain = True
  2499. else:
  2500. use_start = True
  2501. elif '|' == line[0]:
  2502. line = line[1:]
  2503. use_start = True
  2504. if line[-1] in ('^', '|'):
  2505. line = line[:-1]
  2506. if not use_postfix:
  2507. use_end = True
  2508. line = line.replace('^', '*').strip('*')
  2509. conditions = black_conditions if use_proxy else white_conditions
  2510. if use_start and use_end:
  2511. conditions['shExpMatch'] += ['*%s*' % line]
  2512. elif use_start:
  2513. if '*' in line:
  2514. if use_postfix:
  2515. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2516. else:
  2517. conditions['shExpMatch'] += ['*%s*' % line]
  2518. else:
  2519. conditions['url.indexOf'] += [line]
  2520. elif use_domain and use_end:
  2521. if '*' in line:
  2522. conditions['shExpMatch'] += ['%s*' % line]
  2523. else:
  2524. conditions['host'] += [line]
  2525. elif use_domain:
  2526. if line.split('/')[0].count('.') <= 1:
  2527. if use_postfix:
  2528. conditions['shExpMatch'] += ['*.%s*%s' % (line, x) for x in use_postfix]
  2529. else:
  2530. conditions['shExpMatch'] += ['*.%s*' % line]
  2531. else:
  2532. if '*' in line:
  2533. if use_postfix:
  2534. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2535. else:
  2536. conditions['shExpMatch'] += ['*%s*' % line]
  2537. else:
  2538. if use_postfix:
  2539. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2540. else:
  2541. conditions['url.indexOf'] += ['http://%s' % line]
  2542. else:
  2543. if use_postfix:
  2544. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2545. else:
  2546. conditions['shExpMatch'] += ['*%s*' % line]
  2547. templates = ['''\
  2548. function %(func_name)s(url, host) {
  2549. return '%(default)s';
  2550. }''',
  2551. '''\
  2552. var blackhole_host = {
  2553. %(blackhole_host)s
  2554. };
  2555. function %(func_name)s(url, host) {
  2556. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2557. if (blackhole_host.hasOwnProperty(host)) {
  2558. return 'PROXY %(proxy)s';
  2559. }
  2560. return '%(default)s';
  2561. }''',
  2562. '''\
  2563. var blackhole_host = {
  2564. %(blackhole_host)s
  2565. };
  2566. var blackhole_url_indexOf = [
  2567. %(blackhole_url_indexOf)s
  2568. ];
  2569. function %s(url, host) {
  2570. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2571. if (blackhole_host.hasOwnProperty(host)) {
  2572. return 'PROXY %(proxy)s';
  2573. }
  2574. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2575. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2576. return 'PROXY %(proxy)s';
  2577. }
  2578. }
  2579. return '%(default)s';
  2580. }''',
  2581. '''\
  2582. var blackhole_host = {
  2583. %(blackhole_host)s
  2584. };
  2585. var blackhole_url_indexOf = [
  2586. %(blackhole_url_indexOf)s
  2587. ];
  2588. var blackhole_shExpMatch = [
  2589. %(blackhole_shExpMatch)s
  2590. ];
  2591. function %(func_name)s(url, host) {
  2592. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2593. if (blackhole_host.hasOwnProperty(host)) {
  2594. return 'PROXY %(proxy)s';
  2595. }
  2596. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2597. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2598. return 'PROXY %(proxy)s';
  2599. }
  2600. }
  2601. for (i = 0; i < blackhole_shExpMatch.length; i++) {
  2602. if (shExpMatch(url, blackhole_shExpMatch[i])) {
  2603. return 'PROXY %(proxy)s';
  2604. }
  2605. }
  2606. return '%(default)s';
  2607. }''']
  2608. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in templates[admode].splitlines()), '', templates[admode])
  2609. template_kwargs = {'blackhole_host': ',\r\n'.join("%s'%s': 1" % (' '*indent, x) for x in sorted(black_conditions['host'])),
  2610. 'blackhole_url_indexOf': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['url.indexOf'])),
  2611. 'blackhole_shExpMatch': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['shExpMatch'])),
  2612. 'func_name': func_name,
  2613. 'proxy': proxy,
  2614. 'default': default}
  2615. return template % template_kwargs
  2616. class PacFileFilter(BaseProxyHandlerFilter):
  2617. """pac file filter"""
  2618. def filter(self, handler):
  2619. is_local_client = handler.client_address[0] in ('127.0.0.1', '::1')
  2620. pacfile = os.path.join(os.path.dirname(os.path.abspath(__file__)), common.PAC_FILE)
  2621. urlparts = urlparse.urlsplit(handler.path)
  2622. if handler.command == 'GET' and urlparts.path.lstrip('/') == common.PAC_FILE:
  2623. if urlparts.query == 'flush':
  2624. if is_local_client:
  2625. thread.start_new_thread(PacUtil.update_pacfile, (pacfile,))
  2626. else:
  2627. return [handler.MOCK, 403, {'Content-Type': 'text/plain'}, 'client address %r not allowed' % handler.client_address[0]]
  2628. if time.time() - os.path.getmtime(pacfile) > common.PAC_EXPIRED:
  2629. # check system uptime > 30 minutes
  2630. uptime = get_uptime()
  2631. if uptime and uptime > 1800:
  2632. thread.start_new_thread(lambda: os.utime(pacfile, (time.time(), time.time())) or PacUtil.update_pacfile(pacfile), tuple())
  2633. with open(pacfile, 'rb') as fp:
  2634. content = fp.read()
  2635. if not is_local_client:
  2636. listen_ip = ProxyUtil.get_listen_ip()
  2637. content = content.replace('127.0.0.1', listen_ip)
  2638. headers = {'Content-Type': 'text/plain'}
  2639. if 'gzip' in handler.headers.get('Accept-Encoding', ''):
  2640. headers['Content-Encoding'] = 'gzip'
  2641. compressobj = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
  2642. dataio = io.BytesIO()
  2643. dataio.write('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
  2644. dataio.write(compressobj.compress(content))
  2645. dataio.write(compressobj.flush())
  2646. dataio.write(struct.pack('<LL', zlib.crc32(content) & 0xFFFFFFFFL, len(content) & 0xFFFFFFFFL))
  2647. content = dataio.getvalue()
  2648. return [handler.MOCK, 200, headers, content]
  2649. class StaticFileFilter(BaseProxyHandlerFilter):
  2650. """static file filter"""
  2651. def filter(self, handler):
  2652. path = urlparse.urlsplit(handler.path).path
  2653. if handler.command == 'GET' and path.startswith('/'):
  2654. filename = '.' + path
  2655. if os.path.isfile(filename):
  2656. with open(filename, 'rb') as fp:
  2657. content = fp.read()
  2658. headers = {'Content-Type': 'application/octet-stream', 'Connection': 'close'}
  2659. return [handler.MOCK, 200, headers, content]
  2660. class BlackholeFilter(BaseProxyHandlerFilter):
  2661. """blackhole filter"""
  2662. one_pixel_gif = 'GIF89a\x01\x00\x01\x00\x80\xff\x00\xc0\xc0\xc0\x00\x00\x00!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;'
  2663. def filter(self, handler):
  2664. if handler.command == 'CONNECT':
  2665. return [handler.STRIP, True, self]
  2666. elif handler.path.startswith(('http://', 'https://')):
  2667. headers = {'Cache-Control': 'max-age=86400',
  2668. 'Expires': 'Oct, 01 Aug 2100 00:00:00 GMT',
  2669. 'Connection': 'close'}
  2670. content = ''
  2671. if urlparse.urlsplit(handler.path).path.lower().endswith(('.jpg', '.gif', '.png','.jpeg', '.bmp')):
  2672. headers['Content-Type'] = 'image/gif'
  2673. content = self.one_pixel_gif
  2674. return [handler.MOCK, 200, headers, content]
  2675. else:
  2676. return [handler.MOCK, 404, {'Connection': 'close'}, '']
  2677. class PACProxyHandler(SimpleProxyHandler):
  2678. """pac proxy handler"""
  2679. handler_filters = [PacFileFilter(), StaticFileFilter(), BlackholeFilter()]
  2680. def get_process_list():
  2681. import os
  2682. import glob
  2683. import ctypes
  2684. import collections
  2685. Process = collections.namedtuple('Process', 'pid name exe')
  2686. process_list = []
  2687. if os.name == 'nt':
  2688. PROCESS_QUERY_INFORMATION = 0x0400
  2689. PROCESS_VM_READ = 0x0010
  2690. lpidProcess = (ctypes.c_ulong * 1024)()
  2691. cb = ctypes.sizeof(lpidProcess)
  2692. cbNeeded = ctypes.c_ulong()
  2693. ctypes.windll.psapi.EnumProcesses(ctypes.byref(lpidProcess), cb, ctypes.byref(cbNeeded))
  2694. nReturned = cbNeeded.value/ctypes.sizeof(ctypes.c_ulong())
  2695. pidProcess = [i for i in lpidProcess][:nReturned]
  2696. has_queryimage = hasattr(ctypes.windll.kernel32, 'QueryFullProcessImageNameA')
  2697. for pid in pidProcess:
  2698. hProcess = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, pid)
  2699. if hProcess:
  2700. modname = ctypes.create_string_buffer(2048)
  2701. count = ctypes.c_ulong(ctypes.sizeof(modname))
  2702. if has_queryimage:
  2703. ctypes.windll.kernel32.QueryFullProcessImageNameA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2704. else:
  2705. ctypes.windll.psapi.GetModuleFileNameExA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2706. exe = modname.value
  2707. name = os.path.basename(exe)
  2708. process_list.append(Process(pid=pid, name=name, exe=exe))
  2709. ctypes.windll.kernel32.CloseHandle(hProcess)
  2710. elif sys.platform.startswith('linux'):
  2711. for filename in glob.glob('/proc/[0-9]*/cmdline'):
  2712. pid = int(filename.split('/')[2])
  2713. exe_link = '/proc/%d/exe' % pid
  2714. if os.path.exists(exe_link):
  2715. exe = os.readlink(exe_link)
  2716. name = os.path.basename(exe)
  2717. process_list.append(Process(pid=pid, name=name, exe=exe))
  2718. else:
  2719. try:
  2720. import psutil
  2721. process_list = psutil.get_process_list()
  2722. except StandardError as e:
  2723. logging.exception('psutil.get_process_list() failed: %r', e)
  2724. return process_list
  2725. def pre_start():
  2726. if sys.platform == 'cygwin':
  2727. logging.info('cygwin is not officially supported, please continue at your own risk :)')
  2728. #sys.exit(-1)
  2729. elif os.name == 'posix':
  2730. try:
  2731. import resource
  2732. resource.setrlimit(resource.RLIMIT_NOFILE, (8192, -1))
  2733. except ValueError:
  2734. pass
  2735. elif os.name == 'nt':
  2736. import ctypes
  2737. ctypes.windll.kernel32.SetConsoleTitleW(u'GoAgent v%s' % __version__)
  2738. if not common.LISTEN_VISIBLE:
  2739. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 0)
  2740. else:
  2741. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 1)
  2742. if common.LOVE_ENABLE and random.randint(1, 100) <= 5:
  2743. title = ctypes.create_unicode_buffer(1024)
  2744. ctypes.windll.kernel32.GetConsoleTitleW(ctypes.byref(title), len(title)-1)
  2745. ctypes.windll.kernel32.SetConsoleTitleW('%s %s' % (title.value, random.choice(common.LOVE_TIP)))
  2746. blacklist = {'360safe': False,
  2747. 'QQProtect': False, }
  2748. softwares = [k for k, v in blacklist.items() if v]
  2749. if softwares:
  2750. tasklist = '\n'.join(x.name for x in get_process_list()).lower()
  2751. softwares = [x for x in softwares if x.lower() in tasklist]
  2752. if softwares:
  2753. title = u'GoAgent 建议'
  2754. error = u'某些安全软件(如 %s)可能和本软件存在冲突,造成 CPU 占用过高。\n如有此现象建议暂时退出此安全软件来继续运行GoAgent' % ','.join(softwares)
  2755. ctypes.windll.user32.MessageBoxW(None, error, title, 0)
  2756. #sys.exit(0)
  2757. if os.path.isfile('/proc/cpuinfo'):
  2758. with open('/proc/cpuinfo', 'rb') as fp:
  2759. m = re.search(r'(?im)(BogoMIPS|cpu MHz)\s+:\s+([\d\.]+)', fp.read())
  2760. if m and float(m.group(2)) < 1000:
  2761. logging.warning("*NOTE*, Please set [gae]window=2")
  2762. if GAEProxyHandler.max_window != common.GAE_WINDOW:
  2763. GAEProxyHandler.max_window = common.GAE_WINDOW
  2764. if common.GAE_KEEPALIVE and common.GAE_MODE == 'https':
  2765. GAEProxyHandler.ssl_connection_keepalive = True
  2766. if common.GAE_APPIDS[0] == 'goagent':
  2767. logging.critical('please edit %s to add your appid to [gae] !', common.CONFIG_FILENAME)
  2768. sys.exit(-1)
  2769. if common.GAE_MODE == 'http' and common.GAE_PASSWORD == '':
  2770. logging.critical('to enable http mode, you should set %r [gae]password = <your_pass> and [gae]options = rc4', common.CONFIG_FILENAME)
  2771. sys.exit(-1)
  2772. if common.GAE_TRANSPORT:
  2773. GAEProxyHandler.disable_transport_ssl = False
  2774. if common.GAE_REGIONS and not pygeoip:
  2775. logging.critical('to enable [gae]regions mode, you should install pygeoip')
  2776. sys.exit(-1)
  2777. if common.PAC_ENABLE:
  2778. pac_ip = ProxyUtil.get_listen_ip() if common.PAC_IP in ('', '::', '0.0.0.0') else common.PAC_IP
  2779. url = 'http://%s:%d/%s' % (pac_ip, common.PAC_PORT, common.PAC_FILE)
  2780. spawn_later(600, urllib2.build_opener(urllib2.ProxyHandler({})).open, url)
  2781. if not dnslib:
  2782. logging.error('dnslib not found, please put dnslib-0.8.3.egg to %r!', os.path.dirname(os.path.abspath(__file__)))
  2783. sys.exit(-1)
  2784. if not common.DNS_ENABLE:
  2785. if not common.HTTP_DNS:
  2786. common.HTTP_DNS = common.DNS_SERVERS[:]
  2787. for dnsservers_ref in (common.HTTP_DNS, common.DNS_SERVERS):
  2788. any(dnsservers_ref.insert(0, x) for x in [y for y in get_dnsserver_list() if y not in dnsservers_ref])
  2789. AdvancedProxyHandler.dns_servers = common.HTTP_DNS
  2790. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  2791. else:
  2792. AdvancedProxyHandler.dns_servers = common.HTTP_DNS or common.DNS_SERVERS
  2793. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  2794. if not OpenSSL:
  2795. logging.warning('python-openssl not found, please install it!')
  2796. RangeFetch.threads = common.AUTORANGE_THREADS
  2797. RangeFetch.maxsize = common.AUTORANGE_MAXSIZE
  2798. RangeFetch.bufsize = common.AUTORANGE_BUFSIZE
  2799. RangeFetch.waitsize = common.AUTORANGE_WAITSIZE
  2800. if common.LISTEN_USERNAME and common.LISTEN_PASSWORD:
  2801. GAEProxyHandler.handler_filters.insert(0, AuthFilter(common.LISTEN_USERNAME, common.LISTEN_PASSWORD))
  2802. def main():
  2803. global __file__
  2804. __file__ = os.path.abspath(__file__)
  2805. if os.path.islink(__file__):
  2806. __file__ = getattr(os, 'readlink', lambda x: x)(__file__)
  2807. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  2808. logging.basicConfig(level=logging.DEBUG if common.LISTEN_DEBUGINFO else logging.INFO, format='%(levelname)s - %(asctime)s %(message)s', datefmt='[%b %d %H:%M:%S]')
  2809. pre_start()
  2810. CertUtil.check_ca()
  2811. sys.stdout.write(common.info())
  2812. uvent_enabled = 'uvent.loop' in sys.modules and isinstance(gevent.get_hub().loop, __import__('uvent').loop.UVLoop)
  2813. if common.PHP_ENABLE:
  2814. host, port = common.PHP_LISTEN.split(':')
  2815. HandlerClass = ((PHPProxyHandler, GreenForwardPHPProxyHandler) if not common.PROXY_ENABLE else (ProxyChainPHPProxyHandler, ProxyChainGreenForwardPHPProxyHandler))[uvent_enabled]
  2816. server = LocalProxyServer((host, int(port)), HandlerClass)
  2817. thread.start_new_thread(server.serve_forever, tuple())
  2818. if common.PAC_ENABLE:
  2819. server = LocalProxyServer((common.PAC_IP, common.PAC_PORT), PACProxyHandler)
  2820. thread.start_new_thread(server.serve_forever, tuple())
  2821. if common.DNS_ENABLE:
  2822. try:
  2823. sys.path += ['.']
  2824. from dnsproxy import DNSServer
  2825. host, port = common.DNS_LISTEN.split(':')
  2826. server = DNSServer((host, int(port)), dns_servers=common.DNS_SERVERS, dns_blacklist=common.DNS_BLACKLIST, dns_tcpover=common.DNS_TCPOVER)
  2827. thread.start_new_thread(server.serve_forever, tuple())
  2828. except ImportError:
  2829. logging.exception('GoAgent DNSServer requires dnslib and gevent 1.0')
  2830. sys.exit(-1)
  2831. HandlerClass = ((GAEProxyHandler, GreenForwardGAEProxyHandler) if not common.PROXY_ENABLE else (ProxyChainGAEProxyHandler, ProxyChainGreenForwardGAEProxyHandler))[uvent_enabled]
  2832. server = LocalProxyServer((common.LISTEN_IP, common.LISTEN_PORT), HandlerClass)
  2833. try:
  2834. server.serve_forever()
  2835. except SystemError as e:
  2836. if '(libev) select: ' in repr(e):
  2837. logging.error('PLEASE START GOAGENT BY uvent.bat')
  2838. sys.exit(-1)
  2839. if __name__ == '__main__':
  2840. main()