PageRenderTime 82ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/local/proxy.py

https://github.com/panzt/goagent
Python | 3159 lines | 3041 code | 60 blank | 58 comment | 187 complexity | 0bd3ffb6fcaf45dff3fea9417f7de7d9 MD5 | raw file
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. # Based on GAppProxy 2.0.0 by Du XiaoGang <dugang.2008@gmail.com>
  4. # Based on WallProxy 0.4.0 by Hust Moon <www.ehust@gmail.com>
  5. # Contributor:
  6. # Phus Lu <phus.lu@gmail.com>
  7. # Hewig Xu <hewigovens@gmail.com>
  8. # Ayanamist Yang <ayanamist@gmail.com>
  9. # V.E.O <V.E.O@tom.com>
  10. # Max Lv <max.c.lv@gmail.com>
  11. # AlsoTang <alsotang@gmail.com>
  12. # Christopher Meng <i@cicku.me>
  13. # Yonsm Guo <YonsmGuo@gmail.com>
  14. # Parkman <cseparkman@gmail.com>
  15. # Ming Bai <mbbill@gmail.com>
  16. # Bin Yu <yubinlove1991@gmail.com>
  17. # lileixuan <lileixuan@gmail.com>
  18. # Cong Ding <cong@cding.org>
  19. # Zhang Youfu <zhangyoufu@gmail.com>
  20. # Lu Wei <luwei@barfoo>
  21. # Harmony Meow <harmony.meow@gmail.com>
  22. # logostream <logostream@gmail.com>
  23. # Rui Wang <isnowfy@gmail.com>
  24. # Wang Wei Qiang <wwqgtxx@gmail.com>
  25. # Felix Yan <felixonmars@gmail.com>
  26. # Sui Feng <suifeng.me@qq.com>
  27. # QXO <qxodream@gmail.com>
  28. # Geek An <geekan@foxmail.com>
  29. # Poly Rabbit <mcx_221@foxmail.com>
  30. # oxnz <yunxinyi@gmail.com>
  31. # Shusen Liu <liushusen.smart@gmail.com>
  32. # Yad Smood <y.s.inside@gmail.com>
  33. # Chen Shuang <cs0x7f@gmail.com>
  34. # cnfuyu <cnfuyu@gmail.com>
  35. # cuixin <steven.cuixin@gmail.com>
  36. # s2marine0 <s2marine0@gmail.com>
  37. # Toshio Xiang <snachx@gmail.com>
  38. # Bo Tian <dxmtb@163.com>
  39. # Virgil <variousvirgil@gmail.com>
  40. # hub01 <miaojiabumiao@yeah.net>
  41. # v3aqb <sgzz.cj@gmail.com>
  42. # Oling Cat <olingcat@gmail.com>
  43. __version__ = '3.1.16'
  44. import sys
  45. import os
  46. import glob
  47. reload(sys).setdefaultencoding('UTF-8')
  48. sys.dont_write_bytecode = True
  49. sys.path += glob.glob('%s/*.egg' % os.path.dirname(os.path.abspath(__file__)))
  50. try:
  51. import gevent
  52. import gevent.socket
  53. import gevent.server
  54. import gevent.queue
  55. import gevent.monkey
  56. gevent.monkey.patch_all(subprocess=True)
  57. except ImportError:
  58. gevent = None
  59. except TypeError:
  60. gevent.monkey.patch_all()
  61. sys.stderr.write('\033[31m Warning: Please update gevent to the latest 1.0 version!\033[0m\n')
  62. import errno
  63. import time
  64. import struct
  65. import collections
  66. import binascii
  67. import zlib
  68. import itertools
  69. import re
  70. import io
  71. import fnmatch
  72. import traceback
  73. import random
  74. import base64
  75. import string
  76. import hashlib
  77. import threading
  78. import thread
  79. import socket
  80. import ssl
  81. import select
  82. import Queue
  83. import SocketServer
  84. import ConfigParser
  85. import BaseHTTPServer
  86. import httplib
  87. import urllib
  88. import urllib2
  89. import urlparse
  90. try:
  91. import dnslib
  92. except ImportError:
  93. dnslib = None
  94. try:
  95. import OpenSSL
  96. except ImportError:
  97. OpenSSL = None
  98. try:
  99. import pygeoip
  100. except ImportError:
  101. pygeoip = None
  102. HAS_PYPY = hasattr(sys, 'pypy_version_info')
  103. NetWorkIOError = (socket.error, ssl.SSLError, OSError) if not OpenSSL else (socket.error, ssl.SSLError, OpenSSL.SSL.Error, OSError)
  104. class Logging(type(sys)):
  105. CRITICAL = 50
  106. FATAL = CRITICAL
  107. ERROR = 40
  108. WARNING = 30
  109. WARN = WARNING
  110. INFO = 20
  111. DEBUG = 10
  112. NOTSET = 0
  113. def __init__(self, *args, **kwargs):
  114. self.level = self.__class__.INFO
  115. self.__set_error_color = lambda: None
  116. self.__set_warning_color = lambda: None
  117. self.__set_debug_color = lambda: None
  118. self.__reset_color = lambda: None
  119. if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
  120. if os.name == 'nt':
  121. import ctypes
  122. SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute
  123. GetStdHandle = ctypes.windll.kernel32.GetStdHandle
  124. self.__set_error_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x04)
  125. self.__set_warning_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x06)
  126. self.__set_debug_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x002)
  127. self.__reset_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x07)
  128. elif os.name == 'posix':
  129. self.__set_error_color = lambda: sys.stderr.write('\033[31m')
  130. self.__set_warning_color = lambda: sys.stderr.write('\033[33m')
  131. self.__set_debug_color = lambda: sys.stderr.write('\033[32m')
  132. self.__reset_color = lambda: sys.stderr.write('\033[0m')
  133. @classmethod
  134. def getLogger(cls, *args, **kwargs):
  135. return cls(*args, **kwargs)
  136. def basicConfig(self, *args, **kwargs):
  137. self.level = int(kwargs.get('level', self.__class__.INFO))
  138. if self.level > self.__class__.DEBUG:
  139. self.debug = self.dummy
  140. def log(self, level, fmt, *args, **kwargs):
  141. sys.stderr.write('%s - [%s] %s\n' % (level, time.ctime()[4:-5], fmt % args))
  142. def dummy(self, *args, **kwargs):
  143. pass
  144. def debug(self, fmt, *args, **kwargs):
  145. self.__set_debug_color()
  146. self.log('DEBUG', fmt, *args, **kwargs)
  147. self.__reset_color()
  148. def info(self, fmt, *args, **kwargs):
  149. self.log('INFO', fmt, *args)
  150. def warning(self, fmt, *args, **kwargs):
  151. self.__set_warning_color()
  152. self.log('WARNING', fmt, *args, **kwargs)
  153. self.__reset_color()
  154. def warn(self, fmt, *args, **kwargs):
  155. self.warning(fmt, *args, **kwargs)
  156. def error(self, fmt, *args, **kwargs):
  157. self.__set_error_color()
  158. self.log('ERROR', fmt, *args, **kwargs)
  159. self.__reset_color()
  160. def exception(self, fmt, *args, **kwargs):
  161. self.error(fmt, *args, **kwargs)
  162. sys.stderr.write(traceback.format_exc() + '\n')
  163. def critical(self, fmt, *args, **kwargs):
  164. self.__set_error_color()
  165. self.log('CRITICAL', fmt, *args, **kwargs)
  166. self.__reset_color()
  167. logging = sys.modules['logging'] = Logging('logging')
  168. class LRUCache(object):
  169. """http://pypi.python.org/pypi/lru/"""
  170. def __init__(self, max_items=100):
  171. self.cache = {}
  172. self.key_order = []
  173. self.max_items = max_items
  174. def __setitem__(self, key, value):
  175. self.cache[key] = value
  176. self._mark(key)
  177. def __getitem__(self, key):
  178. value = self.cache[key]
  179. self._mark(key)
  180. return value
  181. def __contains__(self, key):
  182. return key in self.cache
  183. def _mark(self, key):
  184. if key in self.key_order:
  185. self.key_order.remove(key)
  186. self.key_order.insert(0, key)
  187. if len(self.key_order) > self.max_items:
  188. index = self.max_items // 2
  189. delitem = self.cache.__delitem__
  190. key_order = self.key_order
  191. any(delitem(key_order[x]) for x in xrange(index, len(key_order)))
  192. self.key_order = self.key_order[:index]
  193. def clear(self):
  194. self.cache = {}
  195. self.key_order = []
  196. class CertUtil(object):
  197. """CertUtil module, based on mitmproxy"""
  198. ca_vendor = 'GoAgent'
  199. ca_keyfile = 'CA.crt'
  200. ca_certdir = 'certs'
  201. ca_lock = threading.Lock()
  202. @staticmethod
  203. def create_ca():
  204. key = OpenSSL.crypto.PKey()
  205. key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  206. ca = OpenSSL.crypto.X509()
  207. ca.set_serial_number(0)
  208. ca.set_version(2)
  209. subj = ca.get_subject()
  210. subj.countryName = 'CN'
  211. subj.stateOrProvinceName = 'Internet'
  212. subj.localityName = 'Cernet'
  213. subj.organizationName = CertUtil.ca_vendor
  214. subj.organizationalUnitName = '%s Root' % CertUtil.ca_vendor
  215. subj.commonName = '%s CA' % CertUtil.ca_vendor
  216. ca.gmtime_adj_notBefore(0)
  217. ca.gmtime_adj_notAfter(24 * 60 * 60 * 3652)
  218. ca.set_issuer(ca.get_subject())
  219. ca.set_pubkey(key)
  220. ca.add_extensions([
  221. OpenSSL.crypto.X509Extension(b'basicConstraints', True, b'CA:TRUE'),
  222. OpenSSL.crypto.X509Extension(b'nsCertType', True, b'sslCA'),
  223. OpenSSL.crypto.X509Extension(b'extendedKeyUsage', True, b'serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC'),
  224. OpenSSL.crypto.X509Extension(b'keyUsage', False, b'keyCertSign, cRLSign'),
  225. OpenSSL.crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', subject=ca), ])
  226. ca.sign(key, 'sha1')
  227. return key, ca
  228. @staticmethod
  229. def dump_ca():
  230. key, ca = CertUtil.create_ca()
  231. with open(CertUtil.ca_keyfile, 'wb') as fp:
  232. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, ca))
  233. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key))
  234. @staticmethod
  235. def _get_cert(commonname, sans=()):
  236. with open(CertUtil.ca_keyfile, 'rb') as fp:
  237. content = fp.read()
  238. key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, content)
  239. ca = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, content)
  240. pkey = OpenSSL.crypto.PKey()
  241. pkey.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  242. req = OpenSSL.crypto.X509Req()
  243. subj = req.get_subject()
  244. subj.countryName = 'CN'
  245. subj.stateOrProvinceName = 'Internet'
  246. subj.localityName = 'Cernet'
  247. subj.organizationalUnitName = '%s Branch' % CertUtil.ca_vendor
  248. if commonname[0] == '.':
  249. subj.commonName = '*' + commonname
  250. subj.organizationName = '*' + commonname
  251. sans = ['*'+commonname] + [x for x in sans if x != '*'+commonname]
  252. else:
  253. subj.commonName = commonname
  254. subj.organizationName = commonname
  255. sans = [commonname] + [x for x in sans if x != commonname]
  256. #req.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans)).encode()])
  257. req.set_pubkey(pkey)
  258. req.sign(pkey, 'sha1')
  259. cert = OpenSSL.crypto.X509()
  260. cert.set_version(2)
  261. try:
  262. cert.set_serial_number(int(hashlib.md5(commonname.encode('utf-8')).hexdigest(), 16))
  263. except OpenSSL.SSL.Error:
  264. cert.set_serial_number(int(time.time()*1000))
  265. cert.gmtime_adj_notBefore(0)
  266. cert.gmtime_adj_notAfter(60 * 60 * 24 * 3652)
  267. cert.set_issuer(ca.get_subject())
  268. cert.set_subject(req.get_subject())
  269. cert.set_pubkey(req.get_pubkey())
  270. if commonname[0] == '.':
  271. sans = ['*'+commonname] + [s for s in sans if s != '*'+commonname]
  272. else:
  273. sans = [commonname] + [s for s in sans if s != commonname]
  274. #cert.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  275. cert.sign(key, 'sha1')
  276. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  277. with open(certfile, 'wb') as fp:
  278. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert))
  279. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, pkey))
  280. return certfile
  281. @staticmethod
  282. def get_cert(commonname, sans=()):
  283. if commonname.count('.') >= 2 and [len(x) for x in reversed(commonname.split('.'))] > [2, 4]:
  284. commonname = '.'+commonname.partition('.')[-1]
  285. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  286. if os.path.exists(certfile):
  287. return certfile
  288. elif OpenSSL is None:
  289. return CertUtil.ca_keyfile
  290. else:
  291. with CertUtil.ca_lock:
  292. if os.path.exists(certfile):
  293. return certfile
  294. return CertUtil._get_cert(commonname, sans)
  295. @staticmethod
  296. def import_ca(certfile):
  297. commonname = os.path.splitext(os.path.basename(certfile))[0]
  298. sha1digest = 'AB:70:2C:DF:18:EB:E8:B4:38:C5:28:69:CD:4A:5D:EF:48:B4:0E:33'
  299. if OpenSSL:
  300. try:
  301. with open(certfile, 'rb') as fp:
  302. x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, fp.read())
  303. commonname = next(v.decode() for k, v in x509.get_subject().get_components() if k == b'O')
  304. sha1digest = x509.digest('sha1')
  305. except StandardError as e:
  306. logging.error('load_certificate(certfile=%r) failed:%s', certfile, e)
  307. if sys.platform.startswith('win'):
  308. import ctypes
  309. with open(certfile, 'rb') as fp:
  310. certdata = fp.read()
  311. if certdata.startswith(b'-----'):
  312. begin = b'-----BEGIN CERTIFICATE-----'
  313. end = b'-----END CERTIFICATE-----'
  314. certdata = base64.b64decode(b''.join(certdata[certdata.find(begin)+len(begin):certdata.find(end)].strip().splitlines()))
  315. crypt32 = ctypes.WinDLL(b'crypt32.dll'.decode())
  316. store_handle = crypt32.CertOpenStore(10, 0, 0, 0x4000 | 0x20000, b'ROOT'.decode())
  317. if not store_handle:
  318. return -1
  319. X509_ASN_ENCODING = 0x00000001
  320. CERT_FIND_HASH = 0x10000
  321. class CRYPT_HASH_BLOB(ctypes.Structure):
  322. _fields_ = [('cbData', ctypes.c_ulong), ('pbData', ctypes.c_char_p)]
  323. crypt_hash = CRYPT_HASH_BLOB(20, binascii.a2b_hex(sha1digest.replace(':', '')))
  324. crypt_handle = crypt32.CertFindCertificateInStore(store_handle, X509_ASN_ENCODING, 0, CERT_FIND_HASH, ctypes.byref(crypt_hash), None)
  325. if crypt_handle:
  326. crypt32.CertFreeCertificateContext(crypt_handle)
  327. return 0
  328. ret = crypt32.CertAddEncodedCertificateToStore(store_handle, 0x1, certdata, len(certdata), 4, None)
  329. crypt32.CertCloseStore(store_handle, 0)
  330. del crypt32
  331. return 0 if ret else -1
  332. elif sys.platform == 'darwin':
  333. return os.system(('security find-certificate -a -c "%s" | grep "%s" >/dev/null || security add-trusted-cert -d -r trustRoot -k "/Library/Keychains/System.keychain" "%s"' % (commonname, commonname, certfile.decode('utf-8'))).encode('utf-8'))
  334. elif sys.platform.startswith('linux'):
  335. import platform
  336. platform_distname = platform.dist()[0]
  337. if platform_distname == 'Ubuntu':
  338. pemfile = "/etc/ssl/certs/%s.pem" % commonname
  339. new_certfile = "/usr/local/share/ca-certificates/%s.crt" % commonname
  340. if not os.path.exists(pemfile):
  341. return os.system('cp "%s" "%s" && update-ca-certificates' % (certfile, new_certfile))
  342. elif any(os.path.isfile('%s/certutil' % x) for x in os.environ['PATH'].split(os.pathsep)):
  343. return os.system('certutil -L -d sql:$HOME/.pki/nssdb | grep "%s" || certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "%s" -i "%s"' % (commonname, commonname, certfile))
  344. else:
  345. logging.warning('please install *libnss3-tools* package to import GoAgent root ca')
  346. return 0
  347. @staticmethod
  348. def check_ca():
  349. #Check CA exists
  350. capath = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_keyfile)
  351. certdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_certdir)
  352. if not os.path.exists(capath):
  353. if not OpenSSL:
  354. logging.critical('CA.key is not exist and OpenSSL is disabled, ABORT!')
  355. sys.exit(-1)
  356. if os.path.exists(certdir):
  357. if os.path.isdir(certdir):
  358. any(os.remove(x) for x in glob.glob(certdir+'/*.crt')+glob.glob(certdir+'/.*.crt'))
  359. else:
  360. os.remove(certdir)
  361. os.mkdir(certdir)
  362. CertUtil.dump_ca()
  363. if glob.glob('%s/*.key' % CertUtil.ca_certdir):
  364. for filename in glob.glob('%s/*.key' % CertUtil.ca_certdir):
  365. try:
  366. os.remove(filename)
  367. os.remove(os.path.splitext(filename)[0]+'.crt')
  368. except EnvironmentError:
  369. pass
  370. #Check CA imported
  371. if CertUtil.import_ca(capath) != 0:
  372. logging.warning('install root certificate failed, Please run as administrator/root/sudo')
  373. #Check Certs Dir
  374. if not os.path.exists(certdir):
  375. os.makedirs(certdir)
  376. class DetectMobileBrowser:
  377. """detect mobile function from http://detectmobilebrowsers.com"""
  378. regex_match_a = re.compile(r"(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", re.I|re.M).search
  379. regex_match_b = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\\-|your|zeto|zte\\-", re.I|re.M).search
  380. @staticmethod
  381. def detect(user_agent):
  382. return DetectMobileBrowser.regex_match_a(user_agent) or DetectMobileBrowser.regex_match_b(user_agent)
  383. class SSLConnection(object):
  384. """OpenSSL Connection Wapper"""
  385. def __init__(self, context, sock):
  386. self._context = context
  387. self._sock = sock
  388. self._connection = OpenSSL.SSL.Connection(context, sock)
  389. self._makefile_refs = 0
  390. def __getattr__(self, attr):
  391. if attr not in ('_context', '_sock', '_connection', '_makefile_refs'):
  392. return getattr(self._connection, attr)
  393. def __wait_sock_io(self, sock, io_func, *args, **kwargs):
  394. timeout = self._sock.gettimeout() or 0.1
  395. fd = self._sock.fileno()
  396. while True:
  397. try:
  398. return io_func(*args, **kwargs)
  399. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  400. sys.exc_clear()
  401. _, _, errors = select.select([fd], [], [fd], timeout)
  402. if errors:
  403. break
  404. except OpenSSL.SSL.WantWriteError:
  405. sys.exc_clear()
  406. _, _, errors = select.select([], [fd], [fd], timeout)
  407. if errors:
  408. break
  409. def accept(self):
  410. sock, addr = self._sock.accept()
  411. client = OpenSSL.SSL.Connection(sock._context, sock)
  412. return client, addr
  413. def do_handshake(self):
  414. return self.__wait_sock_io(self._sock, self._connection.do_handshake)
  415. def connect(self, *args, **kwargs):
  416. return self.__wait_sock_io(self._sock, self._connection.connect, *args, **kwargs)
  417. def send(self, data, flags=0):
  418. try:
  419. return self.__wait_sock_io(self._sock, self._connection.send, data, flags)
  420. except OpenSSL.SSL.SysCallError as e:
  421. if e[0] == -1 and not data:
  422. # errors when writing empty strings are expected and can be ignored
  423. return 0
  424. raise
  425. def recv(self, bufsiz, flags=0):
  426. pending = self._connection.pending()
  427. if pending:
  428. return self._connection.recv(min(pending, bufsiz))
  429. try:
  430. return self.__wait_sock_io(self._sock, self._connection.recv, bufsiz, flags)
  431. except OpenSSL.SSL.ZeroReturnError:
  432. return ''
  433. def read(self, bufsiz, flags=0):
  434. return self.recv(bufsiz, flags)
  435. def write(self, buf, flags=0):
  436. return self.sendall(buf, flags)
  437. def close(self):
  438. if self._makefile_refs < 1:
  439. self._connection = None
  440. if self._sock:
  441. socket.socket.close(self._sock)
  442. else:
  443. self._makefile_refs -= 1
  444. def makefile(self, mode='r', bufsize=-1):
  445. self._makefile_refs += 1
  446. return socket._fileobject(self, mode, bufsize, close=True)
  447. class ProxyUtil(object):
  448. """ProxyUtil module, based on urllib2"""
  449. @staticmethod
  450. def parse_proxy(proxy):
  451. return urllib2._parse_proxy(proxy)
  452. @staticmethod
  453. def get_system_proxy():
  454. proxies = urllib2.getproxies()
  455. return proxies.get('https') or proxies.get('http') or {}
  456. @staticmethod
  457. def get_listen_ip():
  458. listen_ip = '127.0.0.1'
  459. sock = None
  460. try:
  461. sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  462. sock.connect(('8.8.8.8', 53))
  463. listen_ip = sock.getsockname()[0]
  464. except socket.error:
  465. pass
  466. finally:
  467. if sock:
  468. sock.close()
  469. return listen_ip
  470. def parse_hostport(host, default_port=80):
  471. m = re.match(r'(.+)[#](\d+)$', host)
  472. if m:
  473. return m.group(1).strip('[]'), int(m.group(2))
  474. else:
  475. return host.strip('[]'), default_port
  476. def dnslib_resolve_over_udp(query, dnsservers, timeout, **kwargs):
  477. """
  478. http://gfwrev.blogspot.com/2009/11/gfwdns.html
  479. http://zh.wikipedia.org/wiki/%E5%9F%9F%E5%90%8D%E6%9C%8D%E5%8A%A1%E5%99%A8%E7%BC%93%E5%AD%98%E6%B1%A1%E6%9F%93
  480. http://support.microsoft.com/kb/241352
  481. """
  482. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  483. raise TypeError('query argument requires string/DNSRecord')
  484. blacklist = kwargs.get('blacklist', ())
  485. turstservers = kwargs.get('turstservers', ())
  486. dns_v4_servers = [x for x in dnsservers if ':' not in x]
  487. dns_v6_servers = [x for x in dnsservers if ':' in x]
  488. sock_v4 = sock_v6 = None
  489. socks = []
  490. if dns_v4_servers:
  491. sock_v4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  492. socks.append(sock_v4)
  493. if dns_v6_servers:
  494. sock_v6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
  495. socks.append(sock_v6)
  496. timeout_at = time.time() + timeout
  497. try:
  498. for _ in xrange(4):
  499. try:
  500. for dnsserver in dns_v4_servers:
  501. if isinstance(query, basestring):
  502. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query))
  503. query_data = query.pack()
  504. sock_v4.sendto(query_data, parse_hostport(dnsserver, 53))
  505. for dnsserver in dns_v6_servers:
  506. if isinstance(query, basestring):
  507. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=dnslib.QTYPE.AAAA))
  508. query_data = query.pack()
  509. sock_v6.sendto(query_data, parse_hostport(dnsserver, 53))
  510. while time.time() < timeout_at:
  511. ins, _, _ = select.select(socks, [], [], 0.1)
  512. for sock in ins:
  513. reply_data, reply_address = sock.recvfrom(512)
  514. reply_server = reply_address[0]
  515. record = dnslib.DNSRecord.parse(reply_data)
  516. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  517. if any(x in blacklist for x in iplist):
  518. logging.warning('query=%r dnsservers=%r record bad iplist=%r', query, dnsservers, iplist)
  519. elif record.header.rcode and not iplist and reply_server in turstservers:
  520. logging.info('query=%r trust reply_server=%r record rcode=%s', query, reply_server, record.header.rcode)
  521. return record
  522. elif iplist:
  523. logging.debug('query=%r reply_server=%r record iplist=%s', query, reply_server, iplist)
  524. return record
  525. else:
  526. logging.debug('query=%r reply_server=%r record null iplist=%s', query, reply_server, iplist)
  527. continue
  528. except socket.error as e:
  529. logging.warning('handle dns query=%s socket: %r', query, e)
  530. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  531. finally:
  532. for sock in socks:
  533. sock.close()
  534. def dnslib_resolve_over_tcp(query, dnsservers, timeout, **kwargs):
  535. """dns query over tcp"""
  536. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  537. raise TypeError('query argument requires string/DNSRecord')
  538. blacklist = kwargs.get('blacklist', ())
  539. def do_resolve(query, dnsserver, timeout, queobj):
  540. if isinstance(query, basestring):
  541. qtype = dnslib.QTYPE.AAAA if ':' in dnsserver else dnslib.QTYPE.A
  542. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=qtype))
  543. query_data = query.pack()
  544. sock_family = socket.AF_INET6 if ':' in dnsserver else socket.AF_INET
  545. sock = socket.socket(sock_family)
  546. rfile = None
  547. try:
  548. sock.settimeout(timeout or None)
  549. sock.connect(parse_hostport(dnsserver, 53))
  550. sock.send(struct.pack('>h', len(query_data)) + query_data)
  551. rfile = sock.makefile('r', 1024)
  552. reply_data_length = rfile.read(2)
  553. if len(reply_data_length) < 2:
  554. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  555. reply_data = rfile.read(struct.unpack('>h', reply_data_length)[0])
  556. record = dnslib.DNSRecord.parse(reply_data)
  557. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  558. if any(x in blacklist for x in iplist):
  559. logging.debug('query=%r dnsserver=%r record bad iplist=%r', query, dnsserver, iplist)
  560. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  561. else:
  562. logging.debug('query=%r dnsserver=%r record iplist=%s', query, dnsserver, iplist)
  563. queobj.put(record)
  564. except socket.error as e:
  565. logging.debug('query=%r dnsserver=%r failed %r', query, dnsserver, e)
  566. queobj.put(e)
  567. finally:
  568. if rfile:
  569. rfile.close()
  570. sock.close()
  571. queobj = Queue.Queue()
  572. for dnsserver in dnsservers:
  573. thread.start_new_thread(do_resolve, (query, dnsserver, timeout, queobj))
  574. for i in range(len(dnsservers)):
  575. try:
  576. result = queobj.get(timeout)
  577. except Queue.Empty:
  578. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  579. if result and not isinstance(result, Exception):
  580. return result
  581. elif i == len(dnsservers) - 1:
  582. logging.warning('dnslib_resolve_over_tcp %r with %s return %r', query, dnsservers, result)
  583. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  584. def dnslib_record2iplist(record):
  585. """convert dnslib.DNSRecord to iplist"""
  586. assert isinstance(record, dnslib.DNSRecord)
  587. iplist = [x for x in (str(r.rdata) for r in record.rr) if re.match(r'^\d+\.\d+\.\d+\.\d+$', x) or ':' in x]
  588. return iplist
  589. def get_dnsserver_list():
  590. if os.name == 'nt':
  591. import ctypes, ctypes.wintypes, struct, socket
  592. DNS_CONFIG_DNS_SERVER_LIST = 6
  593. buf = ctypes.create_string_buffer(2048)
  594. ctypes.windll.dnsapi.DnsQueryConfig(DNS_CONFIG_DNS_SERVER_LIST, 0, None, None, ctypes.byref(buf), ctypes.byref(ctypes.wintypes.DWORD(len(buf))))
  595. ipcount = struct.unpack('I', buf[0:4])[0]
  596. iplist = [socket.inet_ntoa(buf[i:i+4]) for i in xrange(4, ipcount*4+4, 4)]
  597. return iplist
  598. elif os.path.isfile('/etc/resolv.conf'):
  599. with open('/etc/resolv.conf', 'rb') as fp:
  600. return re.findall(r'(?m)^nameserver\s+(\S+)', fp.read())
  601. else:
  602. logging.warning("get_dnsserver_list failed: unsupport platform '%s-%s'", sys.platform, os.name)
  603. return []
  604. def spawn_later(seconds, target, *args, **kwargs):
  605. def wrap(*args, **kwargs):
  606. __import__('time').sleep(seconds)
  607. return target(*args, **kwargs)
  608. return __import__('thread').start_new_thread(wrap, args, kwargs)
  609. def is_clienthello(data):
  610. if len(data) < 20:
  611. return False
  612. if data.startswith('\x16\x03'):
  613. # TLSv12/TLSv11/TLSv1/SSLv3
  614. length, = struct.unpack('>h', data[3:5])
  615. return len(data) == 5 + length
  616. elif data[0] == '\x80' and data[2:4] == '\x01\x03':
  617. # SSLv23
  618. return len(data) == 2 + ord(data[1])
  619. else:
  620. return False
  621. def extract_sni_name(packet):
  622. if packet.startswith('\x16\x03'):
  623. stream = io.BytesIO(packet)
  624. stream.read(0x2b)
  625. session_id_length = ord(stream.read(1))
  626. stream.read(session_id_length)
  627. cipher_suites_length, = struct.unpack('>h', stream.read(2))
  628. stream.read(cipher_suites_length+2)
  629. extensions_length, = struct.unpack('>h', stream.read(2))
  630. extensions = {}
  631. while True:
  632. data = stream.read(2)
  633. if not data:
  634. break
  635. etype, = struct.unpack('>h', data)
  636. elen, = struct.unpack('>h', stream.read(2))
  637. edata = stream.read(elen)
  638. if etype == 0:
  639. server_name = edata[5:]
  640. return server_name
  641. class URLFetch(object):
  642. """URLFetch for gae/php fetchservers"""
  643. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  644. def __init__(self, fetchserver, create_http_request):
  645. assert isinstance(fetchserver, basestring) and callable(create_http_request)
  646. self.fetchserver = fetchserver
  647. self.create_http_request = create_http_request
  648. def fetch(self, method, url, headers, body, timeout, **kwargs):
  649. if '.appspot.com/' in self.fetchserver:
  650. response = self.__gae_fetch(method, url, headers, body, timeout, **kwargs)
  651. response.app_header_parsed = True
  652. else:
  653. response = self.__php_fetch(method, url, headers, body, timeout, **kwargs)
  654. response.app_header_parsed = False
  655. return response
  656. def __gae_fetch(self, method, url, headers, body, timeout, **kwargs):
  657. # deflate = lambda x:zlib.compress(x)[2:-4]
  658. rc4crypt = lambda s, k: RC4Cipher(k).encrypt(s) if k else s
  659. if body:
  660. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  661. zbody = zlib.compress(body)[2:-4]
  662. if len(zbody) < len(body):
  663. body = zbody
  664. headers['Content-Encoding'] = 'deflate'
  665. headers['Content-Length'] = str(len(body))
  666. # GAE donot allow set `Host` header
  667. if 'Host' in headers:
  668. del headers['Host']
  669. metadata = 'G-Method:%s\nG-Url:%s\n%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v))
  670. skip_headers = self.skip_headers
  671. metadata += ''.join('%s:%s\n' % (k.title(), v) for k, v in headers.items() if k not in skip_headers)
  672. # prepare GAE request
  673. request_fetchserver = self.fetchserver
  674. request_method = 'POST'
  675. request_headers = {}
  676. if common.GAE_OBFUSCATE:
  677. request_method = 'GET'
  678. query_string = base64.b64encode(zlib.compress(metadata + '\n\n' + (body or ''))[2:-4]).strip()
  679. request_fetchserver += '?' + query_string
  680. if common.GAE_PAGESPEED:
  681. request_fetchserver = re.sub(r'^(\w+://)', r'\g<1>1-ps.googleusercontent.com/h/', request_fetchserver)
  682. else:
  683. metadata = zlib.compress(metadata)[2:-4]
  684. body = '%s%s%s' % (struct.pack('!h', len(metadata)), metadata, body)
  685. if 'rc4' in common.GAE_OPTIONS:
  686. request_headers['X-GOA-Options'] = 'rc4'
  687. body = rc4crypt(body, kwargs.get('password'))
  688. request_headers['Content-Length'] = str(len(body))
  689. # post data
  690. need_crlf = 0 if common.GAE_MODE == 'https' else 1
  691. need_validate = common.GAE_VALIDATE
  692. cache_key = '%s:%d' % (common.HOST_POSTFIX_MAP['.appspot.com'], 443 if common.GAE_MODE == 'https' else 80)
  693. response = self.create_http_request(request_method, request_fetchserver, request_headers, body, timeout, crlf=need_crlf, validate=need_validate, cache_key=cache_key)
  694. response.app_status = response.status
  695. response.app_options = response.getheader('X-GOA-Options', '')
  696. if response.status != 200:
  697. return response
  698. data = response.read(4)
  699. if len(data) < 4:
  700. response.status = 502
  701. response.fp = io.BytesIO(b'connection aborted. too short leadbyte data=' + data)
  702. response.read = response.fp.read
  703. return response
  704. response.status, headers_length = struct.unpack('!hh', data)
  705. data = response.read(headers_length)
  706. if len(data) < headers_length:
  707. response.status = 502
  708. response.fp = io.BytesIO(b'connection aborted. too short headers data=' + data)
  709. response.read = response.fp.read
  710. return response
  711. if 'rc4' not in response.app_options:
  712. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(data, -zlib.MAX_WBITS)))
  713. else:
  714. response.msg = httplib.HTTPMessage(io.BytesIO(zlib.decompress(rc4crypt(data, kwargs.get('password')), -zlib.MAX_WBITS)))
  715. if kwargs.get('password') and response.fp:
  716. response.fp = CipherFileObject(response.fp, RC4Cipher(kwargs['password']))
  717. return response
  718. def __php_fetch(self, method, url, headers, body, timeout, **kwargs):
  719. if body:
  720. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  721. zbody = zlib.compress(body)[2:-4]
  722. if len(zbody) < len(body):
  723. body = zbody
  724. headers['Content-Encoding'] = 'deflate'
  725. headers['Content-Length'] = str(len(body))
  726. skip_headers = self.skip_headers
  727. metadata = 'G-Method:%s\nG-Url:%s\n%s%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v), ''.join('%s:%s\n' % (k, v) for k, v in headers.items() if k not in skip_headers))
  728. metadata = zlib.compress(metadata)[2:-4]
  729. app_body = b''.join((struct.pack('!h', len(metadata)), metadata, body))
  730. app_headers = {'Content-Length': len(app_body), 'Content-Type': 'application/octet-stream'}
  731. fetchserver = '%s?%s' % (self.fetchserver, random.random())
  732. crlf = 0
  733. cache_key = '%s//:%s' % urlparse.urlsplit(fetchserver)[:2]
  734. response = self.create_http_request('POST', fetchserver, app_headers, app_body, timeout, crlf=crlf, cache_key=cache_key)
  735. if not response:
  736. raise socket.error(errno.ECONNRESET, 'urlfetch %r return None' % url)
  737. if response.status >= 400:
  738. return response
  739. response.app_status = response.status
  740. need_decrypt = kwargs.get('password') and response.app_status == 200 and response.getheader('Content-Type', '') == 'image/gif' and response.fp
  741. if need_decrypt:
  742. response.fp = CipherFileObject(response.fp, XORCipher(kwargs['password'][0]))
  743. return response
  744. class BaseProxyHandlerFilter(object):
  745. """base proxy handler filter"""
  746. def filter(self, handler):
  747. raise NotImplementedError
  748. class SimpleProxyHandlerFilter(BaseProxyHandlerFilter):
  749. """simple proxy handler filter"""
  750. def filter(self, handler):
  751. if handler.command == 'CONNECT':
  752. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  753. else:
  754. return [handler.DIRECT, {}]
  755. class AuthFilter(BaseProxyHandlerFilter):
  756. """authorization filter"""
  757. auth_info = "Proxy authentication required"""
  758. white_list = set(['127.0.0.1'])
  759. def __init__(self, username, password):
  760. self.username = username
  761. self.password = password
  762. def check_auth_header(self, auth_header):
  763. method, _, auth_data = auth_header.partition(' ')
  764. if method == 'Basic':
  765. username, _, password = base64.b64decode(auth_data).partition(':')
  766. if username == self.username and password == self.password:
  767. return True
  768. return False
  769. def filter(self, handler):
  770. if self.white_list and handler.client_address[0] in self.white_list:
  771. return None
  772. auth_header = handler.headers.get('Proxy-Authorization') or getattr(handler, 'auth_header', None)
  773. if auth_header and self.check_auth_header(auth_header):
  774. handler.auth_header = auth_header
  775. else:
  776. headers = {'Access-Control-Allow-Origin': '*',
  777. 'Proxy-Authenticate': 'Basic realm="%s"' % self.auth_info,
  778. 'Content-Length': '0',
  779. 'Connection': 'keep-alive'}
  780. return [handler.MOCK, 407, headers, '']
  781. class SimpleProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
  782. """SimpleProxyHandler for GoAgent 3.x"""
  783. protocol_version = 'HTTP/1.1'
  784. ssl_version = ssl.PROTOCOL_SSLv23
  785. disable_transport_ssl = True
  786. scheme = 'http'
  787. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  788. bufsize = 256 * 1024
  789. max_timeout = 4
  790. connect_timeout = 4
  791. first_run_lock = threading.Lock()
  792. handler_filters = [SimpleProxyHandlerFilter()]
  793. sticky_filter = None
  794. def finish(self):
  795. """make python2 BaseHTTPRequestHandler happy"""
  796. try:
  797. BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
  798. except NetWorkIOError as e:
  799. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  800. raise
  801. def address_string(self):
  802. return '%s:%s' % self.client_address[:2]
  803. def send_response(self, code, message=None):
  804. if message is None:
  805. if code in self.responses:
  806. message = self.responses[code][0]
  807. else:
  808. message = ''
  809. if self.request_version != 'HTTP/0.9':
  810. self.wfile.write('%s %d %s\r\n' % (self.protocol_version, code, message))
  811. def send_header(self, keyword, value):
  812. """Send a MIME header."""
  813. base_send_header = BaseHTTPServer.BaseHTTPRequestHandler.send_header
  814. keyword = keyword.title()
  815. if keyword == 'Set-Cookie':
  816. for cookie in re.split(r', (?=[^ =]+(?:=|$))', value):
  817. base_send_header(self, keyword, cookie)
  818. elif keyword == 'Content-Disposition' and '"' not in value:
  819. value = re.sub(r'filename=([^"\']+)', 'filename="\\1"', value)
  820. base_send_header(self, keyword, value)
  821. else:
  822. base_send_header(self, keyword, value)
  823. def setup(self):
  824. if isinstance(self.__class__.first_run, collections.Callable):
  825. try:
  826. with self.__class__.first_run_lock:
  827. if isinstance(self.__class__.first_run, collections.Callable):
  828. self.first_run()
  829. self.__class__.first_run = None
  830. except StandardError as e:
  831. logging.exception('%s.first_run() return %r', self.__class__, e)
  832. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  833. self.__class__.do_CONNECT = self.__class__.do_METHOD
  834. self.__class__.do_GET = self.__class__.do_METHOD
  835. self.__class__.do_PUT = self.__class__.do_METHOD
  836. self.__class__.do_POST = self.__class__.do_METHOD
  837. self.__class__.do_HEAD = self.__class__.do_METHOD
  838. self.__class__.do_DELETE = self.__class__.do_METHOD
  839. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  840. self.setup()
  841. def handle_one_request(self):
  842. if not self.disable_transport_ssl and self.scheme == 'http':
  843. leadbyte = self.connection.recv(1, socket.MSG_PEEK)
  844. if leadbyte in ('\x80', '\x16'):
  845. server_name = ''
  846. if leadbyte == '\x16':
  847. for _ in xrange(2):
  848. leaddata = self.connection.recv(1024, socket.MSG_PEEK)
  849. if is_clienthello(leaddata):
  850. try:
  851. server_name = extract_sni_name(leaddata)
  852. finally:
  853. break
  854. try:
  855. certfile = CertUtil.get_cert(server_name or 'www.google.com')
  856. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  857. except StandardError as e:
  858. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  859. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  860. return
  861. self.connection = ssl_sock
  862. self.rfile = self.connection.makefile('rb', self.bufsize)
  863. self.wfile = self.connection.makefile('wb', 0)
  864. self.scheme = 'https'
  865. return BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self)
  866. def first_run(self):
  867. pass
  868. def gethostbyname2(self, hostname):
  869. return socket.gethostbyname_ex(hostname)[-1]
  870. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  871. return socket.create_connection((hostname, port), timeout)
  872. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  873. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  874. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version)
  875. return ssl_sock
  876. def create_http_request(self, method, url, headers, body, timeout, **kwargs):
  877. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  878. if netloc.rfind(':') <= netloc.rfind(']'):
  879. # no port number
  880. host = netloc
  881. port = 443 if scheme == 'https' else 80
  882. else:
  883. host, _, port = netloc.rpartition(':')
  884. port = int(port)
  885. if query:
  886. path += '?' + query
  887. if 'Host' not in headers:
  888. headers['Host'] = host
  889. if body and 'Content-Length' not in headers:
  890. headers['Content-Length'] = str(len(body))
  891. ConnectionType = httplib.HTTPSConnection if scheme == 'https' else httplib.HTTPConnection
  892. connection = ConnectionType(netloc, timeout=timeout)
  893. connection.request(method, path, body=body, headers=headers)
  894. response = connection.getresponse()
  895. return response
  896. def create_http_request_withserver(self, fetchserver, method, url, headers, body, timeout, **kwargs):
  897. return URLFetch(fetchserver, self.create_http_request).fetch(method, url, headers, body, timeout, **kwargs)
  898. def handle_urlfetch_error(self, fetchserver, response):
  899. pass
  900. def handle_urlfetch_response_close(self, fetchserver, response):
  901. pass
  902. def parse_header(self):
  903. if self.command == 'CONNECT':
  904. netloc = self.path
  905. elif self.path[0] == '/':
  906. netloc = self.headers.get('Host', 'localhost')
  907. self.path = '%s://%s%s' % (self.scheme, netloc, self.path)
  908. else:
  909. netloc = urlparse.urlsplit(self.path).netloc
  910. m = re.match(r'^(.+):(\d+)$', netloc)
  911. if m:
  912. self.host = m.group(1).strip('[]')
  913. self.port = int(m.group(2))
  914. else:
  915. self.host = netloc
  916. self.port = 443 if self.scheme == 'https' else 80
  917. def forward_socket(self, local, remote, timeout):
  918. try:
  919. tick = 1
  920. bufsize = self.bufsize
  921. timecount = timeout
  922. while 1:
  923. timecount -= tick
  924. if timecount <= 0:
  925. break
  926. (ins, _, errors) = select.select([local, remote], [], [local, remote], tick)
  927. if errors:
  928. break
  929. for sock in ins:
  930. data = sock.recv(bufsize)
  931. if not data:
  932. break
  933. if sock is remote:
  934. local.sendall(data)
  935. timecount = timeout
  936. else:
  937. remote.sendall(data)
  938. timecount = timeout
  939. except socket.timeout:
  940. pass
  941. except NetWorkIOError as e:
  942. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  943. raise
  944. if e.args[0] in (errno.EBADF,):
  945. return
  946. finally:
  947. for sock in (remote, local):
  948. try:
  949. sock.close()
  950. except StandardError:
  951. pass
  952. def MOCK(self, status, headers, content):
  953. """mock response"""
  954. logging.info('%s "MOCK %s %s %s" %d %d', self.address_string(), self.command, self.path, self.protocol_version, status, len(content))
  955. headers = dict((k.title(), v) for k, v in headers.items())
  956. if 'Transfer-Encoding' in headers:
  957. del headers['Transfer-Encoding']
  958. if 'Content-Length' not in headers:
  959. headers['Content-Length'] = len(content)
  960. if 'Connection' not in headers:
  961. headers['Connection'] = 'close'
  962. self.send_response(status)
  963. for key, value in headers.items():
  964. self.send_header(key, value)
  965. self.end_headers()
  966. self.wfile.write(content)
  967. def STRIP(self, do_ssl_handshake=True, sticky_filter=None):
  968. """strip connect"""
  969. certfile = CertUtil.get_cert(self.host)
  970. logging.info('%s "STRIP %s %s:%d %s" - -', self.address_string(), self.command, self.host, self.port, self.protocol_version)
  971. self.send_response(200)
  972. self.end_headers()
  973. if do_ssl_handshake:
  974. try:
  975. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  976. except StandardError as e:
  977. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  978. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  979. return
  980. self.connection = ssl_sock
  981. self.rfile = self.connection.makefile('rb', self.bufsize)
  982. self.wfile = self.connection.makefile('wb', 0)
  983. self.scheme = 'https'
  984. try:
  985. self.raw_requestline = self.rfile.readline(65537)
  986. if len(self.raw_requestline) > 65536:
  987. self.requestline = ''
  988. self.request_version = ''
  989. self.command = ''
  990. self.send_error(414)
  991. return
  992. if not self.raw_requestline:
  993. self.close_connection = 1
  994. return
  995. if not self.parse_request():
  996. return
  997. except NetWorkIOError as e:
  998. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  999. raise
  1000. self.sticky_filter = sticky_filter
  1001. try:
  1002. self.do_METHOD()
  1003. except NetWorkIOError as e:
  1004. if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
  1005. raise
  1006. def FORWARD(self, hostname, port, timeout, kwargs={}):
  1007. """forward socket"""
  1008. do_ssl_handshake = kwargs.pop('do_ssl_handshake', False)
  1009. local = self.connection
  1010. remote = None
  1011. self.send_response(200)
  1012. self.end_headers()
  1013. self.close_connection = 1
  1014. data = local.recv(1024)
  1015. if not data:
  1016. local.close()
  1017. return
  1018. data_is_clienthello = is_clienthello(data)
  1019. if data_is_clienthello:
  1020. kwargs['client_hello'] = data
  1021. max_retry = kwargs.get('max_retry', 3)
  1022. for i in xrange(max_retry):
  1023. try:
  1024. if do_ssl_handshake:
  1025. remote = self.create_ssl_connection(hostname, port, timeout, **kwargs)
  1026. else:
  1027. remote = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  1028. if not data_is_clienthello and remote and not isinstance(remote, Exception):
  1029. remote.sendall(data)
  1030. break
  1031. except StandardError as e:
  1032. logging.exception('%s "FWD %s %s:%d %s" %r', self.address_string(), self.command, hostname, port, self.protocol_version, e)
  1033. if hasattr(remote, 'close'):
  1034. remote.close()
  1035. if i == max_retry - 1:
  1036. raise
  1037. logging.info('%s "FWD %s %s:%d %s" - -', self.address_string(), self.command, hostname, port, self.protocol_version)
  1038. if hasattr(remote, 'fileno'):
  1039. # reset timeout default to avoid long http upload failure, but it will delay timeout retry :(
  1040. remote.settimeout(None)
  1041. del kwargs
  1042. data = data_is_clienthello and getattr(remote, 'data', None)
  1043. if data:
  1044. del remote.data
  1045. local.sendall(data)
  1046. self.forward_socket(local, remote, self.max_timeout)
  1047. def DIRECT(self, kwargs):
  1048. method = self.command
  1049. if 'url' in kwargs:
  1050. url = kwargs.pop('url')
  1051. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1052. url = self.path
  1053. else:
  1054. url = 'http://%s%s' % (self.headers['Host'], self.path)
  1055. headers = dict((k.title(), v) for k, v in self.headers.items())
  1056. body = self.body
  1057. response = None
  1058. try:
  1059. response = self.create_http_request(method, url, headers, body, timeout=self.connect_timeout, **kwargs)
  1060. logging.info('%s "DIRECT %s %s %s" %s %s', self.address_string(), self.command, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1061. response_headers = dict((k.title(), v) for k, v in response.getheaders())
  1062. self.send_response(response.status)
  1063. for key, value in response.getheaders():
  1064. self.send_header(key, value)
  1065. self.end_headers()
  1066. if self.command == 'HEAD' or response.status in (204, 304):
  1067. response.close()
  1068. return
  1069. need_chunked = 'Transfer-Encoding' in response_headers
  1070. while True:
  1071. data = response.read(8192)
  1072. if not data:
  1073. if need_chunked:
  1074. self.wfile.write('0\r\n\r\n')
  1075. break
  1076. if need_chunked:
  1077. self.wfile.write('%x\r\n' % len(data))
  1078. self.wfile.write(data)
  1079. if need_chunked:
  1080. self.wfile.write('\r\n')
  1081. del data
  1082. except (ssl.SSLError, socket.timeout, socket.error):
  1083. if response:
  1084. if response.fp and response.fp._sock:
  1085. response.fp._sock.close()
  1086. response.close()
  1087. finally:
  1088. if response:
  1089. response.close()
  1090. def URLFETCH(self, fetchservers, max_retry=2, kwargs={}):
  1091. """urlfetch from fetchserver"""
  1092. method = self.command
  1093. if self.path[0] == '/':
  1094. url = '%s://%s%s' % (self.scheme, self.headers['Host'], self.path)
  1095. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1096. url = self.path
  1097. else:
  1098. raise ValueError('URLFETCH %r is not a valid url' % self.path)
  1099. headers = dict((k.title(), v) for k, v in self.headers.items())
  1100. body = self.body
  1101. response = None
  1102. errors = []
  1103. fetchserver = fetchservers[0]
  1104. for i in xrange(max_retry):
  1105. try:
  1106. response = self.create_http_request_withserver(fetchserver, method, url, headers, body, timeout=60, **kwargs)
  1107. if response.app_status < 400:
  1108. break
  1109. else:
  1110. self.handle_urlfetch_error(fetchserver, response)
  1111. if i < max_retry - 1:
  1112. if len(fetchservers) > 1:
  1113. fetchserver = random.choice(fetchservers[1:])
  1114. logging.info('URLFETCH return %d, trying fetchserver=%r', response.app_status, fetchserver)
  1115. response.close()
  1116. except StandardError as e:
  1117. errors.append(e)
  1118. logging.info('URLFETCH "%s %s" fetchserver=%r %r, retry...', method, url, fetchserver, e)
  1119. if len(errors) == max_retry:
  1120. if response and response.app_status >= 500:
  1121. status = response.app_status
  1122. headers = dict(response.getheaders())
  1123. content = response.read()
  1124. response.close()
  1125. else:
  1126. status = 502
  1127. headers = {'Content-Type': 'text/html'}
  1128. content = message_html('502 URLFetch failed', 'Local URLFetch %r failed' % url, '<br>'.join(repr(x) for x in errors))
  1129. return self.MOCK(status, headers, content)
  1130. logging.info('%s "URL %s %s %s" %s %s', self.address_string(), method, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1131. try:
  1132. if response.status == 206:
  1133. return RangeFetch(self, response, fetchservers, **kwargs).fetch()
  1134. if response.app_header_parsed:
  1135. self.close_connection = not response.getheader('Content-Length')
  1136. self.send_response(response.status)
  1137. for key, value in response.getheaders():
  1138. if key.title() == 'Transfer-Encoding':
  1139. continue
  1140. self.send_header(key, value)
  1141. self.end_headers()
  1142. bufsize = 8192
  1143. while True:
  1144. data = response.read(bufsize)
  1145. if data:
  1146. self.wfile.write(data)
  1147. if not data:
  1148. self.handle_urlfetch_response_close(fetchserver, response)
  1149. response.close()
  1150. break
  1151. del data
  1152. except NetWorkIOError as e:
  1153. if e[0] in (errno.ECONNABORTED, errno.EPIPE) or 'bad write retry' in repr(e):
  1154. return
  1155. def do_METHOD(self):
  1156. self.parse_header()
  1157. self.body = self.rfile.read(int(self.headers['Content-Length'])) if 'Content-Length' in self.headers else ''
  1158. if self.sticky_filter:
  1159. action = self.sticky_filter.filter(self)
  1160. if action:
  1161. return action.pop(0)(*action)
  1162. for handler_filter in self.handler_filters:
  1163. action = handler_filter.filter(self)
  1164. if action:
  1165. return action.pop(0)(*action)
  1166. class RangeFetch(object):
  1167. """Range Fetch Class"""
  1168. threads = 2
  1169. maxsize = 1024*1024*4
  1170. bufsize = 8192
  1171. waitsize = 1024*512
  1172. def __init__(self, handler, response, fetchservers, **kwargs):
  1173. self.handler = handler
  1174. self.url = handler.path
  1175. self.response = response
  1176. self.fetchservers = fetchservers
  1177. self.kwargs = kwargs
  1178. self._stopped = None
  1179. self._last_app_status = {}
  1180. self.expect_begin = 0
  1181. def fetch(self):
  1182. response_status = self.response.status
  1183. response_headers = dict((k.title(), v) for k, v in self.response.getheaders())
  1184. content_range = response_headers['Content-Range']
  1185. #content_length = response_headers['Content-Length']
  1186. start, end, length = tuple(int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)', content_range).group(1, 2, 3))
  1187. if start == 0:
  1188. response_status = 200
  1189. response_headers['Content-Length'] = str(length)
  1190. del response_headers['Content-Range']
  1191. else:
  1192. response_headers['Content-Range'] = 'bytes %s-%s/%s' % (start, end, length)
  1193. response_headers['Content-Length'] = str(length-start)
  1194. logging.info('>>>>>>>>>>>>>>> RangeFetch started(%r) %d-%d', self.url, start, end)
  1195. self.handler.send_response(response_status)
  1196. for key, value in response_headers.items():
  1197. self.handler.send_header(key, value)
  1198. self.handler.end_headers()
  1199. data_queue = Queue.PriorityQueue()
  1200. range_queue = Queue.PriorityQueue()
  1201. range_queue.put((start, end, self.response))
  1202. self.expect_begin = start
  1203. for begin in range(end+1, length, self.maxsize):
  1204. range_queue.put((begin, min(begin+self.maxsize-1, length-1), None))
  1205. for i in xrange(0, self.threads):
  1206. range_delay_size = i * self.maxsize
  1207. spawn_later(float(range_delay_size)/self.waitsize, self.__fetchlet, range_queue, data_queue, range_delay_size)
  1208. has_peek = hasattr(data_queue, 'peek')
  1209. peek_timeout = 120
  1210. while self.expect_begin < length - 1:
  1211. try:
  1212. if has_peek:
  1213. begin, data = data_queue.peek(timeout=peek_timeout)
  1214. if self.expect_begin == begin:
  1215. data_queue.get()
  1216. elif self.expect_begin < begin:
  1217. time.sleep(0.1)
  1218. continue
  1219. else:
  1220. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1221. break
  1222. else:
  1223. begin, data = data_queue.get(timeout=peek_timeout)
  1224. if self.expect_begin == begin:
  1225. pass
  1226. elif self.expect_begin < begin:
  1227. data_queue.put((begin, data))
  1228. time.sleep(0.1)
  1229. continue
  1230. else:
  1231. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1232. break
  1233. except Queue.Empty:
  1234. logging.error('data_queue peek timeout, break')
  1235. break
  1236. try:
  1237. self.handler.wfile.write(data)
  1238. self.expect_begin += len(data)
  1239. del data
  1240. except StandardError as e:
  1241. logging.info('RangeFetch client connection aborted(%s).', e)
  1242. break
  1243. self._stopped = True
  1244. def __fetchlet(self, range_queue, data_queue, range_delay_size):
  1245. headers = dict((k.title(), v) for k, v in self.handler.headers.items())
  1246. headers['Connection'] = 'close'
  1247. while 1:
  1248. try:
  1249. if self._stopped:
  1250. return
  1251. try:
  1252. start, end, response = range_queue.get(timeout=1)
  1253. if self.expect_begin < start and data_queue.qsize() * self.bufsize + range_delay_size > 30*1024*1024:
  1254. range_queue.put((start, end, response))
  1255. time.sleep(10)
  1256. continue
  1257. headers['Range'] = 'bytes=%d-%d' % (start, end)
  1258. fetchserver = ''
  1259. if not response:
  1260. fetchserver = random.choice(self.fetchservers)
  1261. if self._last_app_status.get(fetchserver, 200) >= 500:
  1262. time.sleep(5)
  1263. response = self.handler.create_http_request_withserver(fetchserver, self.handler.command, self.url, headers, self.handler.body, timeout=self.handler.connect_timeout, **self.kwargs)
  1264. except Queue.Empty:
  1265. continue
  1266. except StandardError as e:
  1267. logging.warning("Response %r in __fetchlet", e)
  1268. range_queue.put((start, end, None))
  1269. continue
  1270. if not response:
  1271. logging.warning('RangeFetch %s return %r', headers['Range'], response)
  1272. range_queue.put((start, end, None))
  1273. continue
  1274. if fetchserver:
  1275. self._last_app_status[fetchserver] = response.app_status
  1276. if response.app_status != 200:
  1277. logging.warning('Range Fetch "%s %s" %s return %s', self.handler.command, self.url, headers['Range'], response.app_status)
  1278. response.close()
  1279. range_queue.put((start, end, None))
  1280. continue
  1281. if response.getheader('Location'):
  1282. self.url = urlparse.urljoin(self.url, response.getheader('Location'))
  1283. logging.info('RangeFetch Redirect(%r)', self.url)
  1284. response.close()
  1285. range_queue.put((start, end, None))
  1286. continue
  1287. if 200 <= response.status < 300:
  1288. content_range = response.getheader('Content-Range')
  1289. if not content_range:
  1290. logging.warning('RangeFetch "%s %s" return Content-Range=%r: response headers=%r', self.handler.command, self.url, content_range, response.getheaders())
  1291. response.close()
  1292. range_queue.put((start, end, None))
  1293. continue
  1294. content_length = int(response.getheader('Content-Length', 0))
  1295. logging.info('>>>>>>>>>>>>>>> [thread %s] %s %s', threading.currentThread().ident, content_length, content_range)
  1296. while 1:
  1297. try:
  1298. if self._stopped:
  1299. response.close()
  1300. return
  1301. data = response.read(self.bufsize)
  1302. if not data:
  1303. break
  1304. data_queue.put((start, data))
  1305. start += len(data)
  1306. except StandardError as e:
  1307. logging.warning('RangeFetch "%s %s" %s failed: %s', self.handler.command, self.url, headers['Range'], e)
  1308. break
  1309. if start < end + 1:
  1310. logging.warning('RangeFetch "%s %s" retry %s-%s', self.handler.command, self.url, start, end)
  1311. response.close()
  1312. range_queue.put((start, end, None))
  1313. continue
  1314. logging.info('>>>>>>>>>>>>>>> Successfully reached %d bytes.', start - 1)
  1315. else:
  1316. logging.error('RangeFetch %r return %s', self.url, response.status)
  1317. response.close()
  1318. range_queue.put((start, end, None))
  1319. continue
  1320. except StandardError as e:
  1321. logging.exception('RangeFetch._fetchlet error:%s', e)
  1322. raise
  1323. class AdvancedProxyHandler(SimpleProxyHandler):
  1324. """Advanced Proxy Handler"""
  1325. dns_cache = LRUCache(64*1024)
  1326. dns_servers = []
  1327. dns_blacklist = []
  1328. tcp_connection_time = collections.defaultdict(float)
  1329. tcp_connection_time_with_clienthello = collections.defaultdict(float)
  1330. tcp_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1331. ssl_connection_time = collections.defaultdict(float)
  1332. ssl_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1333. ssl_connection_keepalive = False
  1334. max_window = 4
  1335. openssl_context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
  1336. def gethostbyname2(self, hostname):
  1337. try:
  1338. iplist = self.dns_cache[hostname]
  1339. except KeyError:
  1340. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  1341. iplist = [hostname]
  1342. elif self.dns_servers:
  1343. try:
  1344. record = dnslib_resolve_over_udp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1345. except socket.gaierror:
  1346. record = dnslib_resolve_over_tcp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1347. iplist = dnslib_record2iplist(record)
  1348. else:
  1349. iplist = socket.gethostbyname_ex(hostname)[-1]
  1350. self.dns_cache[hostname] = iplist
  1351. return iplist
  1352. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  1353. client_hello = kwargs.get('client_hello', None)
  1354. cache_key = kwargs.get('cache_key') if not client_hello else None
  1355. def create_connection(ipaddr, timeout, queobj):
  1356. sock = None
  1357. try:
  1358. # create a ipv4/ipv6 socket object
  1359. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1360. # set reuseaddr option to avoid 10048 socket error
  1361. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1362. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1363. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1364. # disable nagle algorithm to send http request quickly.
  1365. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1366. # set a short timeout to trigger timeout retry more quickly.
  1367. sock.settimeout(min(self.connect_timeout, timeout))
  1368. # start connection time record
  1369. start_time = time.time()
  1370. # TCP connect
  1371. sock.connect(ipaddr)
  1372. # record TCP connection time
  1373. self.tcp_connection_time[ipaddr] = time.time() - start_time
  1374. # send client hello and peek server hello
  1375. if client_hello:
  1376. sock.sendall(client_hello)
  1377. if gevent and isinstance(sock, gevent.socket.socket):
  1378. sock.data = data = sock.recv(4096)
  1379. else:
  1380. data = sock.recv(4096, socket.MSG_PEEK)
  1381. if not data:
  1382. logging.debug('create_tcp_connection %r with client_hello return NULL byte, continue %r', ipaddr, time.time()-start_time)
  1383. raise socket.timeout('timed out')
  1384. # record TCP connection time with client hello
  1385. self.tcp_connection_time_with_clienthello[ipaddr] = time.time() - start_time
  1386. # set timeout
  1387. sock.settimeout(timeout)
  1388. # put tcp socket object to output queobj
  1389. queobj.put(sock)
  1390. except (socket.error, OSError) as e:
  1391. # any socket.error, put Excpetions to output queobj.
  1392. queobj.put(e)
  1393. # reset a large and random timeout to the ipaddr
  1394. self.tcp_connection_time[ipaddr] = self.connect_timeout+random.random()
  1395. # close tcp socket
  1396. if sock:
  1397. sock.close()
  1398. def close_connection(count, queobj, first_tcp_time):
  1399. for _ in range(count):
  1400. sock = queobj.get()
  1401. tcp_time_threshold = min(1, 1.3 * first_tcp_time)
  1402. if sock and not isinstance(sock, Exception):
  1403. ipaddr = sock.getpeername()
  1404. if cache_key and self.tcp_connection_time[ipaddr] < tcp_time_threshold:
  1405. cache_queue = self.tcp_connection_cache[cache_key]
  1406. if cache_queue.qsize() < 8:
  1407. try:
  1408. _, old_sock = cache_queue.get_nowait()
  1409. old_sock.close()
  1410. except Queue.Empty:
  1411. pass
  1412. cache_queue.put((time.time(), sock))
  1413. else:
  1414. sock.close()
  1415. try:
  1416. while cache_key:
  1417. ctime, sock = self.tcp_connection_cache[cache_key].get_nowait()
  1418. if time.time() - ctime < 30:
  1419. return sock
  1420. else:
  1421. sock.close()
  1422. except Queue.Empty:
  1423. pass
  1424. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1425. sock = None
  1426. for _ in range(kwargs.get('max_retry', 3)):
  1427. window = min((self.max_window+1)//2, len(addresses))
  1428. if client_hello:
  1429. addresses.sort(key=self.tcp_connection_time_with_clienthello.__getitem__)
  1430. else:
  1431. addresses.sort(key=self.tcp_connection_time.__getitem__)
  1432. addrs = addresses[:window] + random.sample(addresses, window)
  1433. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1434. for addr in addrs:
  1435. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  1436. for i in range(len(addrs)):
  1437. sock = queobj.get()
  1438. if not isinstance(sock, Exception):
  1439. first_tcp_time = self.tcp_connection_time[sock.getpeername()] if not cache_key else 0
  1440. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, first_tcp_time))
  1441. return sock
  1442. elif i == 0:
  1443. # only output first error
  1444. logging.warning('create_tcp_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1445. if isinstance(sock, Exception):
  1446. raise sock
  1447. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  1448. cache_key = kwargs.get('cache_key')
  1449. validate = kwargs.get('validate')
  1450. def create_connection(ipaddr, timeout, queobj):
  1451. sock = None
  1452. ssl_sock = None
  1453. try:
  1454. # create a ipv4/ipv6 socket object
  1455. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1456. # set reuseaddr option to avoid 10048 socket error
  1457. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1458. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1459. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1460. # disable negal algorithm to send http request quickly.
  1461. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1462. # set a short timeout to trigger timeout retry more quickly.
  1463. sock.settimeout(min(self.connect_timeout, timeout))
  1464. # pick up the certificate
  1465. if not validate:
  1466. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, do_handshake_on_connect=False)
  1467. else:
  1468. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, cert_reqs=ssl.CERT_REQUIRED, ca_certs=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cacert.pem'), do_handshake_on_connect=False)
  1469. ssl_sock.settimeout(min(self.connect_timeout, timeout))
  1470. # start connection time record
  1471. start_time = time.time()
  1472. # TCP connect
  1473. ssl_sock.connect(ipaddr)
  1474. connected_time = time.time()
  1475. # SSL handshake
  1476. ssl_sock.do_handshake()
  1477. handshaked_time = time.time()
  1478. # record TCP connection time
  1479. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1480. # record SSL connection time
  1481. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1482. ssl_sock.ssl_time = connected_time - start_time
  1483. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1484. ssl_sock.sock = sock
  1485. # verify SSL certificate.
  1486. if validate and hostname.endswith('.appspot.com'):
  1487. cert = ssl_sock.getpeercert()
  1488. orgname = next((v for ((k, v),) in cert['subject'] if k == 'organizationName'))
  1489. if not orgname.lower().startswith('google '):
  1490. raise ssl.SSLError("%r certificate organizationName(%r) not startswith 'Google'" % (hostname, orgname))
  1491. # set timeout
  1492. ssl_sock.settimeout(timeout)
  1493. # put ssl socket object to output queobj
  1494. queobj.put(ssl_sock)
  1495. except (socket.error, ssl.SSLError, OSError) as e:
  1496. # any socket.error, put Excpetions to output queobj.
  1497. queobj.put(e)
  1498. # reset a large and random timeout to the ipaddr
  1499. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1500. # close ssl socket
  1501. if ssl_sock:
  1502. ssl_sock.close()
  1503. # close tcp socket
  1504. if sock:
  1505. sock.close()
  1506. def create_connection_withopenssl(ipaddr, timeout, queobj):
  1507. sock = None
  1508. ssl_sock = None
  1509. try:
  1510. # create a ipv4/ipv6 socket object
  1511. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1512. # set reuseaddr option to avoid 10048 socket error
  1513. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1514. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1515. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1516. # disable negal algorithm to send http request quickly.
  1517. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1518. # set a short timeout to trigger timeout retry more quickly.
  1519. sock.settimeout(timeout or self.connect_timeout)
  1520. # pick up the certificate
  1521. server_hostname = b'mail.google.com' if cache_key.startswith('google_') or hostname.endswith('.appspot.com') else None
  1522. ssl_sock = SSLConnection(self.openssl_context, sock)
  1523. ssl_sock.set_connect_state()
  1524. if server_hostname and hasattr(ssl_sock, 'set_tlsext_host_name'):
  1525. ssl_sock.set_tlsext_host_name(server_hostname)
  1526. # start connection time record
  1527. start_time = time.time()
  1528. # TCP connect
  1529. ssl_sock.connect(ipaddr)
  1530. connected_time = time.time()
  1531. # SSL handshake
  1532. ssl_sock.do_handshake()
  1533. handshaked_time = time.time()
  1534. # record TCP connection time
  1535. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1536. # record SSL connection time
  1537. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1538. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1539. ssl_sock.sock = sock
  1540. # verify SSL certificate.
  1541. if validate and hostname.endswith('.appspot.com'):
  1542. cert = ssl_sock.get_peer_certificate()
  1543. commonname = next((v for k, v in cert.get_subject().get_components() if k == 'CN'))
  1544. if '.google' not in commonname and not commonname.endswith('.appspot.com'):
  1545. raise socket.error("Host name '%s' doesn't match certificate host '%s'" % (hostname, commonname))
  1546. # put ssl socket object to output queobj
  1547. queobj.put(ssl_sock)
  1548. except (socket.error, OpenSSL.SSL.Error, OSError) as e:
  1549. # any socket.error, put Excpetions to output queobj.
  1550. queobj.put(e)
  1551. # reset a large and random timeout to the ipaddr
  1552. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1553. # close ssl socket
  1554. if ssl_sock:
  1555. ssl_sock.close()
  1556. # close tcp socket
  1557. if sock:
  1558. sock.close()
  1559. def close_connection(count, queobj, first_tcp_time, first_ssl_time):
  1560. for _ in range(count):
  1561. sock = queobj.get()
  1562. ssl_time_threshold = min(1, 1.3 * first_ssl_time)
  1563. if sock and not isinstance(sock, Exception):
  1564. if cache_key and sock.ssl_time < ssl_time_threshold:
  1565. cache_queue = self.ssl_connection_cache[cache_key]
  1566. if cache_queue.qsize() < 8:
  1567. try:
  1568. _, old_sock = cache_queue.get_nowait()
  1569. old_sock.close()
  1570. except Queue.Empty:
  1571. pass
  1572. cache_queue.put((time.time(), sock))
  1573. else:
  1574. sock.close()
  1575. try:
  1576. while cache_key:
  1577. ctime, sock = self.ssl_connection_cache[cache_key].get_nowait()
  1578. if time.time() - ctime < 30:
  1579. return sock
  1580. else:
  1581. sock.close()
  1582. except Queue.Empty:
  1583. pass
  1584. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1585. sock = None
  1586. for _ in range(kwargs.get('max_retry', 3)):
  1587. window = min((self.max_window+1)//2, len(addresses))
  1588. addresses.sort(key=self.ssl_connection_time.__getitem__)
  1589. addrs = addresses[:window] + random.sample(addresses, window)
  1590. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1591. for addr in addrs:
  1592. thread.start_new_thread(create_connection_withopenssl, (addr, timeout, queobj))
  1593. for i in range(len(addrs)):
  1594. sock = queobj.get()
  1595. if not isinstance(sock, Exception):
  1596. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, sock.tcp_time, sock.ssl_time))
  1597. return sock
  1598. elif i == 0:
  1599. # only output first error
  1600. logging.warning('create_ssl_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1601. if isinstance(sock, Exception):
  1602. raise sock
  1603. def create_http_request(self, method, url, headers, body, timeout, max_retry=2, bufsize=8192, crlf=None, validate=None, cache_key=None):
  1604. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  1605. if netloc.rfind(':') <= netloc.rfind(']'):
  1606. # no port number
  1607. host = netloc
  1608. port = 443 if scheme == 'https' else 80
  1609. else:
  1610. host, _, port = netloc.rpartition(':')
  1611. port = int(port)
  1612. if query:
  1613. path += '?' + query
  1614. if 'Host' not in headers:
  1615. headers['Host'] = host
  1616. if body and 'Content-Length' not in headers:
  1617. headers['Content-Length'] = str(len(body))
  1618. sock = None
  1619. for i in range(max_retry):
  1620. try:
  1621. create_connection = self.create_ssl_connection if scheme == 'https' else self.create_tcp_connection
  1622. sock = create_connection(host, port, timeout, validate=validate, cache_key=cache_key)
  1623. break
  1624. except StandardError as e:
  1625. logging.exception('create_http_request "%s %s" failed:%s', method, url, e)
  1626. if sock:
  1627. sock.close()
  1628. if i == max_retry - 1:
  1629. raise
  1630. request_data = ''
  1631. crlf_counter = 0
  1632. if scheme != 'https' and crlf:
  1633. fakeheaders = dict((k.title(), v) for k, v in headers.items())
  1634. fakeheaders.pop('Content-Length', None)
  1635. fakeheaders.pop('Cookie', None)
  1636. fakeheaders.pop('Host', None)
  1637. if 'User-Agent' not in fakeheaders:
  1638. fakeheaders['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1878.0 Safari/537.36'
  1639. if 'Accept-Language' not in fakeheaders:
  1640. fakeheaders['Accept-Language'] = 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4'
  1641. if 'Accept' not in fakeheaders:
  1642. fakeheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
  1643. fakeheaders_data = ''.join('%s: %s\r\n' % (k, v) for k, v in fakeheaders.items() if k not in self.skip_headers)
  1644. while crlf_counter < 5 or len(request_data) < 1500 * 2:
  1645. request_data += 'GET / HTTP/1.1\r\n%s\r\n' % fakeheaders_data
  1646. crlf_counter += 1
  1647. request_data += '\r\n\r\n\r\n'
  1648. request_data += '%s %s %s\r\n' % (method, path, self.protocol_version)
  1649. request_data += ''.join('%s: %s\r\n' % (k.title(), v) for k, v in headers.items() if k.title() not in self.skip_headers)
  1650. request_data += '\r\n'
  1651. if isinstance(body, bytes):
  1652. sock.sendall(request_data.encode() + body)
  1653. elif hasattr(body, 'read'):
  1654. sock.sendall(request_data)
  1655. while 1:
  1656. data = body.read(bufsize)
  1657. if not data:
  1658. break
  1659. sock.sendall(data)
  1660. else:
  1661. raise TypeError('create_http_request(body) must be a string or buffer, not %r' % type(body))
  1662. response = None
  1663. try:
  1664. while crlf_counter:
  1665. if sys.version[:3] == '2.7':
  1666. response = httplib.HTTPResponse(sock, buffering=False)
  1667. else:
  1668. response = httplib.HTTPResponse(sock)
  1669. response.fp.close()
  1670. response.fp = sock.makefile('rb', 0)
  1671. response.begin()
  1672. response.read()
  1673. response.close()
  1674. crlf_counter -= 1
  1675. except StandardError as e:
  1676. logging.exception('crlf skip read host=%r path=%r error: %r', headers.get('Host'), path, e)
  1677. if response:
  1678. if response.fp and response.fp._sock:
  1679. response.fp._sock.close()
  1680. response.close()
  1681. if sock:
  1682. sock.close()
  1683. return None
  1684. if sys.version[:3] == '2.7':
  1685. response = httplib.HTTPResponse(sock, buffering=True)
  1686. else:
  1687. response = httplib.HTTPResponse(sock)
  1688. response.fp.close()
  1689. response.fp = sock.makefile('rb')
  1690. response.begin()
  1691. if self.ssl_connection_keepalive and scheme == 'https' and cache_key:
  1692. response.cache_key = cache_key
  1693. response.cache_sock = response.fp._sock
  1694. return response
  1695. def handle_urlfetch_response_close(self, fetchserver, response):
  1696. cache_sock = getattr(response, 'cache_sock', None)
  1697. if cache_sock:
  1698. if self.scheme == 'https':
  1699. self.ssl_connection_cache[response.cache_key].put((time.time(), cache_sock))
  1700. else:
  1701. cache_sock.close()
  1702. del response.cache_sock
  1703. def handle_urlfetch_error(self, fetchserver, response):
  1704. pass
  1705. class Common(object):
  1706. """Global Config Object"""
  1707. ENV_CONFIG_PREFIX = 'GOAGENT_'
  1708. def __init__(self):
  1709. """load config from proxy.ini"""
  1710. ConfigParser.RawConfigParser.OPTCRE = re.compile(r'(?P<option>[^=\s][^=]*)\s*(?P<vi>[=])\s*(?P<value>.*)$')
  1711. self.CONFIG = ConfigParser.ConfigParser()
  1712. self.CONFIG_FILENAME = os.path.splitext(os.path.abspath(__file__))[0]+'.ini'
  1713. self.CONFIG_USER_FILENAME = re.sub(r'\.ini$', '.user.ini', self.CONFIG_FILENAME)
  1714. self.CONFIG.read([self.CONFIG_FILENAME, self.CONFIG_USER_FILENAME])
  1715. for key, value in os.environ.items():
  1716. m = re.match(r'^%s([A-Z]+)_([A-Z\_\-]+)$' % self.ENV_CONFIG_PREFIX, key)
  1717. if m:
  1718. self.CONFIG.set(m.group(1).lower(), m.group(2).lower(), value)
  1719. self.LISTEN_IP = self.CONFIG.get('listen', 'ip')
  1720. self.LISTEN_PORT = self.CONFIG.getint('listen', 'port')
  1721. self.LISTEN_USERNAME = self.CONFIG.get('listen', 'username') if self.CONFIG.has_option('listen', 'username') else ''
  1722. self.LISTEN_PASSWORD = self.CONFIG.get('listen', 'password') if self.CONFIG.has_option('listen', 'password') else ''
  1723. self.LISTEN_VISIBLE = self.CONFIG.getint('listen', 'visible')
  1724. self.LISTEN_DEBUGINFO = self.CONFIG.getint('listen', 'debuginfo')
  1725. self.GAE_APPIDS = re.findall(r'[\w\-\.]+', self.CONFIG.get('gae', 'appid').replace('.appspot.com', ''))
  1726. self.GAE_PASSWORD = self.CONFIG.get('gae', 'password').strip()
  1727. self.GAE_PATH = self.CONFIG.get('gae', 'path')
  1728. self.GAE_MODE = self.CONFIG.get('gae', 'mode')
  1729. self.GAE_PROFILE = self.CONFIG.get('gae', 'profile').strip()
  1730. self.GAE_WINDOW = self.CONFIG.getint('gae', 'window')
  1731. self.GAE_KEEPALIVE = self.CONFIG.getint('gae', 'keepalive') if self.CONFIG.has_option('gae', 'keepalive') else 0
  1732. self.GAE_OBFUSCATE = self.CONFIG.getint('gae', 'obfuscate')
  1733. self.GAE_VALIDATE = self.CONFIG.getint('gae', 'validate')
  1734. self.GAE_TRANSPORT = self.CONFIG.getint('gae', 'transport') if self.CONFIG.has_option('gae', 'transport') else 0
  1735. self.GAE_OPTIONS = self.CONFIG.get('gae', 'options')
  1736. self.GAE_REGIONS = set(x.upper() for x in self.CONFIG.get('gae', 'regions').split('|') if x.strip())
  1737. self.GAE_SSLVERSION = self.CONFIG.get('gae', 'sslversion')
  1738. self.GAE_PAGESPEED = self.CONFIG.getint('gae', 'pagespeed') if self.CONFIG.has_option('gae', 'pagespeed') else 0
  1739. if self.GAE_PROFILE == 'auto':
  1740. try:
  1741. socket.create_connection(('2001:4860:4860::8888', 53), timeout=1).close()
  1742. logging.info('Use profile ipv6')
  1743. self.GAE_PROFILE = 'ipv6'
  1744. except socket.error as e:
  1745. logging.info('Fail try profile ipv6 %r, fallback ipv4', e)
  1746. self.GAE_PROFILE = 'ipv4'
  1747. hosts_section, http_section = '%s/hosts' % self.GAE_PROFILE, '%s/http' % self.GAE_PROFILE
  1748. if 'USERDNSDOMAIN' in os.environ and re.match(r'^\w+\.\w+$', os.environ['USERDNSDOMAIN']):
  1749. self.CONFIG.set(hosts_section, '.' + os.environ['USERDNSDOMAIN'], '')
  1750. self.HOST_MAP = collections.OrderedDict((k, v or k) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and not k.startswith('.'))
  1751. self.HOST_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and k.startswith('.'))
  1752. self.HOST_POSTFIX_ENDSWITH = tuple(self.HOST_POSTFIX_MAP)
  1753. self.HOSTPORT_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and not k.startswith('.'))
  1754. self.HOSTPORT_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and k.startswith('.'))
  1755. self.HOSTPORT_POSTFIX_ENDSWITH = tuple(self.HOSTPORT_POSTFIX_MAP)
  1756. self.URLRE_MAP = collections.OrderedDict((re.compile(k).match, v) for k, v in self.CONFIG.items(hosts_section) if '\\' in k)
  1757. self.HTTP_WITHGAE = set(self.CONFIG.get(http_section, 'withgae').split('|'))
  1758. self.HTTP_CRLFSITES = tuple(self.CONFIG.get(http_section, 'crlfsites').split('|'))
  1759. self.HTTP_FORCEHTTPS = tuple(self.CONFIG.get(http_section, 'forcehttps').split('|')) if self.CONFIG.get(http_section, 'forcehttps').strip() else tuple()
  1760. self.HTTP_NOFORCEHTTPS = set(self.CONFIG.get(http_section, 'noforcehttps').split('|')) if self.CONFIG.get(http_section, 'noforcehttps').strip() else set()
  1761. self.HTTP_FAKEHTTPS = tuple(self.CONFIG.get(http_section, 'fakehttps').split('|')) if self.CONFIG.get(http_section, 'fakehttps').strip() else tuple()
  1762. self.HTTP_NOFAKEHTTPS = set(self.CONFIG.get(http_section, 'nofakehttps').split('|')) if self.CONFIG.get(http_section, 'nofakehttps').strip() else set()
  1763. self.HTTP_DNS = self.CONFIG.get(http_section, 'dns').split('|') if self.CONFIG.has_option(http_section, 'dns') else []
  1764. self.IPLIST_MAP = collections.OrderedDict((k, v.split('|')) for k, v in self.CONFIG.items('iplist'))
  1765. self.IPLIST_MAP.update((k, [k]) for k, v in self.HOST_MAP.items() if k == v)
  1766. self.PAC_ENABLE = self.CONFIG.getint('pac', 'enable')
  1767. self.PAC_IP = self.CONFIG.get('pac', 'ip')
  1768. self.PAC_PORT = self.CONFIG.getint('pac', 'port')
  1769. self.PAC_FILE = self.CONFIG.get('pac', 'file').lstrip('/')
  1770. self.PAC_GFWLIST = self.CONFIG.get('pac', 'gfwlist')
  1771. self.PAC_ADBLOCK = self.CONFIG.get('pac', 'adblock')
  1772. self.PAC_ADMODE = self.CONFIG.getint('pac', 'admode')
  1773. self.PAC_EXPIRED = self.CONFIG.getint('pac', 'expired')
  1774. self.PHP_ENABLE = self.CONFIG.getint('php', 'enable')
  1775. self.PHP_LISTEN = self.CONFIG.get('php', 'listen')
  1776. self.PHP_PASSWORD = self.CONFIG.get('php', 'password') if self.CONFIG.has_option('php', 'password') else ''
  1777. self.PHP_CRLF = self.CONFIG.getint('php', 'crlf') if self.CONFIG.has_option('php', 'crlf') else 1
  1778. self.PHP_VALIDATE = self.CONFIG.getint('php', 'validate') if self.CONFIG.has_option('php', 'validate') else 0
  1779. self.PHP_FETCHSERVER = self.CONFIG.get('php', 'fetchserver')
  1780. self.PHP_USEHOSTS = self.CONFIG.getint('php', 'usehosts')
  1781. self.PROXY_ENABLE = self.CONFIG.getint('proxy', 'enable')
  1782. self.PROXY_AUTODETECT = self.CONFIG.getint('proxy', 'autodetect') if self.CONFIG.has_option('proxy', 'autodetect') else 0
  1783. self.PROXY_HOST = self.CONFIG.get('proxy', 'host')
  1784. self.PROXY_PORT = self.CONFIG.getint('proxy', 'port')
  1785. self.PROXY_USERNAME = self.CONFIG.get('proxy', 'username')
  1786. self.PROXY_PASSWROD = self.CONFIG.get('proxy', 'password')
  1787. if not self.PROXY_ENABLE and self.PROXY_AUTODETECT:
  1788. system_proxy = ProxyUtil.get_system_proxy()
  1789. if system_proxy and self.LISTEN_IP not in system_proxy:
  1790. _, username, password, address = ProxyUtil.parse_proxy(system_proxy)
  1791. proxyhost, _, proxyport = address.rpartition(':')
  1792. self.PROXY_ENABLE = 1
  1793. self.PROXY_USERNAME = username
  1794. self.PROXY_PASSWROD = password
  1795. self.PROXY_HOST = proxyhost
  1796. self.PROXY_PORT = int(proxyport)
  1797. if self.PROXY_ENABLE:
  1798. self.GAE_MODE = 'https'
  1799. self.AUTORANGE_HOSTS = self.CONFIG.get('autorange', 'hosts').split('|')
  1800. self.AUTORANGE_HOSTS_MATCH = [re.compile(fnmatch.translate(h)).match for h in self.AUTORANGE_HOSTS]
  1801. self.AUTORANGE_ENDSWITH = tuple(self.CONFIG.get('autorange', 'endswith').split('|'))
  1802. self.AUTORANGE_NOENDSWITH = tuple(self.CONFIG.get('autorange', 'noendswith').split('|'))
  1803. self.AUTORANGE_MAXSIZE = self.CONFIG.getint('autorange', 'maxsize')
  1804. self.AUTORANGE_WAITSIZE = self.CONFIG.getint('autorange', 'waitsize')
  1805. self.AUTORANGE_BUFSIZE = self.CONFIG.getint('autorange', 'bufsize')
  1806. self.AUTORANGE_THREADS = self.CONFIG.getint('autorange', 'threads')
  1807. self.FETCHMAX_LOCAL = self.CONFIG.getint('fetchmax', 'local') if self.CONFIG.get('fetchmax', 'local') else 3
  1808. self.FETCHMAX_SERVER = self.CONFIG.get('fetchmax', 'server')
  1809. self.DNS_ENABLE = self.CONFIG.getint('dns', 'enable')
  1810. self.DNS_LISTEN = self.CONFIG.get('dns', 'listen')
  1811. self.DNS_SERVERS = self.HTTP_DNS or self.CONFIG.get('dns', 'servers').split('|')
  1812. self.DNS_BLACKLIST = set(self.CONFIG.get('dns', 'blacklist').split('|'))
  1813. self.DNS_TCPOVER = tuple(self.CONFIG.get('dns', 'tcpover').split('|')) if self.CONFIG.get('dns', 'tcpover').strip() else tuple()
  1814. self.USERAGENT_ENABLE = self.CONFIG.getint('useragent', 'enable')
  1815. self.USERAGENT_STRING = self.CONFIG.get('useragent', 'string')
  1816. self.LOVE_ENABLE = self.CONFIG.getint('love', 'enable')
  1817. self.LOVE_TIP = self.CONFIG.get('love', 'tip').encode('utf8').decode('unicode-escape').split('|')
  1818. def resolve_iplist(self):
  1819. def do_resolve(host, dnsservers, queue):
  1820. iplist = []
  1821. for dnslib_resolve in (dnslib_resolve_over_udp, dnslib_resolve_over_tcp):
  1822. try:
  1823. if "<local>" in dnsservers:
  1824. iplist += socket.gethostbyname_ex(host)[-1]
  1825. else:
  1826. iplist += dnslib_record2iplist(dnslib_resolve_over_udp(host, dnsservers, timeout=4, blacklist=self.DNS_BLACKLIST))
  1827. except (socket.error, OSError) as e:
  1828. logging.warning('%r remote host=%r failed: %s', dnslib_resolve, host, e)
  1829. queue.put((host, dnsservers, iplist))
  1830. # https://support.google.com/websearch/answer/186669?hl=zh-Hans
  1831. google_blacklist = ['216.239.32.20'] + list(self.DNS_BLACKLIST)
  1832. for name, need_resolve_hosts in list(self.IPLIST_MAP.items()):
  1833. if all(re.match(r'\d+\.\d+\.\d+\.\d+', x) or ':' in x for x in need_resolve_hosts):
  1834. continue
  1835. need_resolve_remote = [x for x in need_resolve_hosts if ':' not in x and not re.match(r'\d+\.\d+\.\d+\.\d+', x)]
  1836. resolved_iplist = [x for x in need_resolve_hosts if x not in need_resolve_remote]
  1837. result_queue = Queue.Queue()
  1838. for host in need_resolve_remote:
  1839. for dnsserver in self.DNS_SERVERS:
  1840. logging.debug('resolve remote host=%r from dnsserver=%r', host, dnsserver)
  1841. thread.start_new_thread(do_resolve, (host, [dnsserver], result_queue))
  1842. thread.start_new_thread(do_resolve, (host, ["<local>"], result_queue))
  1843. for _ in xrange(len(self.DNS_SERVERS) * len(need_resolve_remote) * 2):
  1844. try:
  1845. host, dnsservers, iplist = result_queue.get(timeout=10)
  1846. resolved_iplist += iplist or []
  1847. logging.debug('resolve remote host=%r from dnsservers=%s return iplist=%s', host, dnsservers, iplist)
  1848. except Queue.Empty:
  1849. logging.warn('resolve remote timeout, try resolve local')
  1850. resolved_iplist += sum([socket.gethostbyname_ex(x)[-1] for x in need_resolve_remote], [])
  1851. break
  1852. if name.startswith('google_') and name not in ('google_cn', 'google_hk') and resolved_iplist:
  1853. iplist_prefix = re.split(r'[\.:]', resolved_iplist[0])[0]
  1854. resolved_iplist = list(set(x for x in resolved_iplist if x.startswith(iplist_prefix)))
  1855. else:
  1856. resolved_iplist = list(set(resolved_iplist))
  1857. if name.startswith('google_'):
  1858. resolved_iplist = list(set(resolved_iplist) - set(google_blacklist))
  1859. if len(resolved_iplist) == 0:
  1860. logging.error('resolve %s host return empty! please retry!', name)
  1861. sys.exit(-1)
  1862. logging.info('resolve name=%s host to iplist=%r', name, resolved_iplist)
  1863. common.IPLIST_MAP[name] = resolved_iplist
  1864. def info(self):
  1865. info = ''
  1866. info += '------------------------------------------------------\n'
  1867. info += 'GoAgent Version : %s (python/%s %spyopenssl/%s)\n' % (__version__, sys.version[:5], gevent and 'gevent/%s ' % gevent.__version__ or '', getattr(OpenSSL, '__version__', 'Disabled'))
  1868. info += 'Uvent Version : %s (pyuv/%s libuv/%s)\n' % (__import__('uvent').__version__, __import__('pyuv').__version__, __import__('pyuv').LIBUV_VERSION) if all(x in sys.modules for x in ('pyuv', 'uvent')) else ''
  1869. info += 'Listen Address : %s:%d\n' % (self.LISTEN_IP, self.LISTEN_PORT)
  1870. info += 'Local Proxy : %s:%s\n' % (self.PROXY_HOST, self.PROXY_PORT) if self.PROXY_ENABLE else ''
  1871. info += 'Debug INFO : %s\n' % self.LISTEN_DEBUGINFO if self.LISTEN_DEBUGINFO else ''
  1872. info += 'GAE Mode : %s\n' % self.GAE_MODE
  1873. info += 'GAE Profile : %s\n' % self.GAE_PROFILE if self.GAE_PROFILE else ''
  1874. info += 'GAE APPID : %s\n' % '|'.join(self.GAE_APPIDS)
  1875. info += 'GAE Validate : %s\n' % self.GAE_VALIDATE if self.GAE_VALIDATE else ''
  1876. info += 'GAE Obfuscate : %s\n' % self.GAE_OBFUSCATE if self.GAE_OBFUSCATE else ''
  1877. if common.PAC_ENABLE:
  1878. info += 'Pac Server : http://%s:%d/%s\n' % (self.PAC_IP if self.PAC_IP and self.PAC_IP != '0.0.0.0' else ProxyUtil.get_listen_ip(), self.PAC_PORT, self.PAC_FILE)
  1879. info += 'Pac File : file://%s\n' % os.path.abspath(self.PAC_FILE)
  1880. if common.PHP_ENABLE:
  1881. info += 'PHP Listen : %s\n' % common.PHP_LISTEN
  1882. info += 'PHP FetchServer : %s\n' % common.PHP_FETCHSERVER
  1883. if common.DNS_ENABLE:
  1884. info += 'DNS Listen : %s\n' % common.DNS_LISTEN
  1885. info += 'DNS Servers : %s\n' % '|'.join(common.DNS_SERVERS)
  1886. info += '------------------------------------------------------\n'
  1887. return info
  1888. common = Common()
  1889. def message_html(title, banner, detail=''):
  1890. MESSAGE_TEMPLATE = '''
  1891. <html><head>
  1892. <meta http-equiv="content-type" content="text/html;charset=utf-8">
  1893. <title>$title</title>
  1894. <style><!--
  1895. body {font-family: arial,sans-serif}
  1896. div.nav {margin-top: 1ex}
  1897. div.nav A {font-size: 10pt; font-family: arial,sans-serif}
  1898. span.nav {font-size: 10pt; font-family: arial,sans-serif; font-weight: bold}
  1899. div.nav A,span.big {font-size: 12pt; color: #0000cc}
  1900. div.nav A {font-size: 10pt; color: black}
  1901. A.l:link {color: #6f6f6f}
  1902. A.u:link {color: green}
  1903. //--></style>
  1904. </head>
  1905. <body text=#000000 bgcolor=#ffffff>
  1906. <table border=0 cellpadding=2 cellspacing=0 width=100%>
  1907. <tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff><b>Message From LocalProxy</b></td></tr>
  1908. <tr><td> </td></tr></table>
  1909. <blockquote>
  1910. <H1>$banner</H1>
  1911. $detail
  1912. <p>
  1913. </blockquote>
  1914. <table width=100% cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img alt="" width=1 height=4></td></tr></table>
  1915. </body></html>
  1916. '''
  1917. return string.Template(MESSAGE_TEMPLATE).substitute(title=title, banner=banner, detail=detail)
  1918. try:
  1919. from Crypto.Cipher.ARC4 import new as RC4Cipher
  1920. except ImportError:
  1921. logging.warn('Load Crypto.Cipher.ARC4 Failed, Use Pure Python Instead.')
  1922. class RC4Cipher(object):
  1923. def __init__(self, key):
  1924. x = 0
  1925. box = range(256)
  1926. for i, y in enumerate(box):
  1927. x = (x + y + ord(key[i % len(key)])) & 0xff
  1928. box[i], box[x] = box[x], y
  1929. self.__box = box
  1930. self.__x = 0
  1931. self.__y = 0
  1932. def encrypt(self, data):
  1933. out = []
  1934. out_append = out.append
  1935. x = self.__x
  1936. y = self.__y
  1937. box = self.__box
  1938. for char in data:
  1939. x = (x + 1) & 0xff
  1940. y = (y + box[x]) & 0xff
  1941. box[x], box[y] = box[y], box[x]
  1942. out_append(chr(ord(char) ^ box[(box[x] + box[y]) & 0xff]))
  1943. self.__x = x
  1944. self.__y = y
  1945. return ''.join(out)
  1946. class XORCipher(object):
  1947. """XOR Cipher Class"""
  1948. def __init__(self, key):
  1949. self.__key_gen = itertools.cycle([ord(x) for x in key]).next
  1950. self.__key_xor = lambda s: ''.join(chr(ord(x) ^ self.__key_gen()) for x in s)
  1951. if len(key) == 1:
  1952. try:
  1953. from Crypto.Util.strxor import strxor_c
  1954. c = ord(key)
  1955. self.__key_xor = lambda s: strxor_c(s, c)
  1956. except ImportError:
  1957. sys.stderr.write('Load Crypto.Util.strxor Failed, Use Pure Python Instead.\n')
  1958. def encrypt(self, data):
  1959. return self.__key_xor(data)
  1960. class CipherFileObject(object):
  1961. """fileobj wrapper for cipher"""
  1962. def __init__(self, fileobj, cipher):
  1963. self.__fileobj = fileobj
  1964. self.__cipher = cipher
  1965. def __getattr__(self, attr):
  1966. if attr not in ('__fileobj', '__cipher'):
  1967. return getattr(self.__fileobj, attr)
  1968. def read(self, size=-1):
  1969. return self.__cipher.encrypt(self.__fileobj.read(size))
  1970. class LocalProxyServer(SocketServer.ThreadingTCPServer):
  1971. """Local Proxy Server"""
  1972. allow_reuse_address = True
  1973. daemon_threads = True
  1974. def close_request(self, request):
  1975. try:
  1976. request.close()
  1977. except StandardError:
  1978. pass
  1979. def finish_request(self, request, client_address):
  1980. try:
  1981. self.RequestHandlerClass(request, client_address, self)
  1982. except NetWorkIOError as e:
  1983. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1984. raise
  1985. def handle_error(self, *args):
  1986. """make ThreadingTCPServer happy"""
  1987. exc_info = sys.exc_info()
  1988. error = exc_info and len(exc_info) and exc_info[1]
  1989. if isinstance(error, NetWorkIOError) and len(error.args) > 1 and 'bad write retry' in error.args[1]:
  1990. exc_info = error = None
  1991. else:
  1992. del exc_info, error
  1993. SocketServer.ThreadingTCPServer.handle_error(self, *args)
  1994. class UserAgentFilter(BaseProxyHandlerFilter):
  1995. """user agent filter"""
  1996. def filter(self, handler):
  1997. if common.USERAGENT_ENABLE:
  1998. handler.headers['User-Agent'] = common.USERAGENT_STRING
  1999. class WithGAEFilter(BaseProxyHandlerFilter):
  2000. """with gae filter"""
  2001. def filter(self, handler):
  2002. if handler.host in common.HTTP_WITHGAE:
  2003. logging.debug('WithGAEFilter metched %r %r', handler.path, handler.headers)
  2004. # assume the last one handler is GAEFetchFilter
  2005. return handler.handler_filters[-1].filter(handler)
  2006. class ForceHttpsFilter(BaseProxyHandlerFilter):
  2007. """force https filter"""
  2008. def filter(self, handler):
  2009. if handler.command != 'CONNECT' and handler.host.endswith(common.HTTP_FORCEHTTPS) and handler.host not in common.HTTP_NOFORCEHTTPS:
  2010. if not handler.headers.get('Referer', '').startswith('https://') and not handler.path.startswith('https://'):
  2011. logging.debug('ForceHttpsFilter metched %r %r', handler.path, handler.headers)
  2012. headers = {'Location': handler.path.replace('http://', 'https://', 1), 'Connection': 'close'}
  2013. return [handler.MOCK, 301, headers, '']
  2014. class FakeHttpsFilter(BaseProxyHandlerFilter):
  2015. """fake https filter"""
  2016. def filter(self, handler):
  2017. if handler.command == 'CONNECT' and handler.host.endswith(common.HTTP_FAKEHTTPS) and handler.host not in common.HTTP_NOFAKEHTTPS:
  2018. logging.debug('FakeHttpsFilter metched %r %r', handler.path, handler.headers)
  2019. return [handler.STRIP, True, None]
  2020. class HostsFilter(BaseProxyHandlerFilter):
  2021. """force https filter"""
  2022. def filter_localfile(self, handler, filename):
  2023. content_type = None
  2024. try:
  2025. import mimetypes
  2026. content_type = mimetypes.types_map.get(os.path.splitext(filename)[1])
  2027. except StandardError as e:
  2028. logging.error('import mimetypes failed: %r', e)
  2029. try:
  2030. with open(filename, 'rb') as fp:
  2031. data = fp.read()
  2032. headers = {'Connection': 'close', 'Content-Length': str(len(data))}
  2033. if content_type:
  2034. headers['Content-Type'] = content_type
  2035. return [handler.MOCK, 200, headers, data]
  2036. except StandardError as e:
  2037. return [handler.MOCK, 403, {'Connection': 'close'}, 'read %r %r' % (filename, e)]
  2038. def filter(self, handler):
  2039. host, port = handler.host, handler.port
  2040. hostport = handler.path if handler.command == 'CONNECT' else '%s:%d' % (host, port)
  2041. hostname = ''
  2042. if host in common.HOST_MAP:
  2043. hostname = common.HOST_MAP[host] or host
  2044. elif host.endswith(common.HOST_POSTFIX_ENDSWITH):
  2045. hostname = next(common.HOST_POSTFIX_MAP[x] for x in common.HOST_POSTFIX_MAP if host.endswith(x)) or host
  2046. common.HOST_MAP[host] = hostname
  2047. if hostport in common.HOSTPORT_MAP:
  2048. hostname = common.HOSTPORT_MAP[hostport] or host
  2049. elif hostport.endswith(common.HOSTPORT_POSTFIX_ENDSWITH):
  2050. hostname = next(common.HOSTPORT_POSTFIX_MAP[x] for x in common.HOSTPORT_POSTFIX_MAP if hostport.endswith(x)) or host
  2051. common.HOSTPORT_MAP[hostport] = hostname
  2052. if handler.command != 'CONNECT' and common.URLRE_MAP:
  2053. try:
  2054. hostname = next(common.URLRE_MAP[x] for x in common.URLRE_MAP if x(handler.path)) or host
  2055. except StopIteration:
  2056. pass
  2057. if not hostname:
  2058. return None
  2059. elif hostname in common.IPLIST_MAP:
  2060. handler.dns_cache[host] = common.IPLIST_MAP[hostname]
  2061. elif hostname == host and host.endswith(common.DNS_TCPOVER) and host not in handler.dns_cache:
  2062. try:
  2063. iplist = dnslib_record2iplist(dnslib_resolve_over_tcp(host, handler.dns_servers, timeout=4, blacklist=handler.dns_blacklist))
  2064. logging.info('HostsFilter dnslib_resolve_over_tcp %r with %r return %s', host, handler.dns_servers, iplist)
  2065. handler.dns_cache[host] = iplist
  2066. except socket.error as e:
  2067. logging.warning('HostsFilter dnslib_resolve_over_tcp %r with %r failed: %r', host, handler.dns_servers, e)
  2068. elif re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2069. handler.dns_cache[host] = [hostname]
  2070. elif hostname.startswith('file://'):
  2071. filename = hostname.lstrip('file://')
  2072. if os.name == 'nt':
  2073. filename = filename.lstrip('/')
  2074. return self.filter_localfile(handler, filename)
  2075. cache_key = '%s:%s' % (hostname, port)
  2076. if handler.command == 'CONNECT':
  2077. return [handler.FORWARD, host, port, handler.connect_timeout, {'cache_key': cache_key}]
  2078. else:
  2079. if host.endswith(common.HTTP_CRLFSITES):
  2080. handler.close_connection = True
  2081. return [handler.DIRECT, {'crlf': True}]
  2082. else:
  2083. return [handler.DIRECT, {'cache_key': cache_key}]
  2084. class DirectRegionFilter(BaseProxyHandlerFilter):
  2085. """direct region filter"""
  2086. geoip = pygeoip.GeoIP(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'GeoIP.dat')) if pygeoip and common.GAE_REGIONS else None
  2087. region_cache = LRUCache(16*1024)
  2088. def get_country_code(self, hostname, dnsservers):
  2089. """http://dev.maxmind.com/geoip/legacy/codes/iso3166/"""
  2090. try:
  2091. return self.region_cache[hostname]
  2092. except KeyError:
  2093. pass
  2094. try:
  2095. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2096. iplist = [hostname]
  2097. elif dnsservers:
  2098. iplist = dnslib_record2iplist(dnslib_resolve_over_udp(hostname, dnsservers, timeout=2))
  2099. else:
  2100. iplist = socket.gethostbyname_ex(hostname)[-1]
  2101. country_code = self.geoip.country_code_by_addr(iplist[0])
  2102. except StandardError as e:
  2103. logging.warning('DirectRegionFilter cannot determine region for hostname=%r %r', hostname, e)
  2104. country_code = ''
  2105. self.region_cache[hostname] = country_code
  2106. return country_code
  2107. def filter(self, handler):
  2108. if self.geoip:
  2109. country_code = self.get_country_code(handler.host, handler.dns_servers)
  2110. if country_code in common.GAE_REGIONS:
  2111. if handler.command == 'CONNECT':
  2112. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  2113. else:
  2114. return [handler.DIRECT, {}]
  2115. class AutoRangeFilter(BaseProxyHandlerFilter):
  2116. """force https filter"""
  2117. def filter(self, handler):
  2118. path = urlparse.urlsplit(handler.path).path
  2119. need_autorange = any(x(handler.host) for x in common.AUTORANGE_HOSTS_MATCH) or path.endswith(common.AUTORANGE_ENDSWITH)
  2120. if path.endswith(common.AUTORANGE_NOENDSWITH) or 'range=' in urlparse.urlsplit(path).query or handler.command == 'HEAD':
  2121. need_autorange = False
  2122. if handler.command != 'HEAD' and handler.headers.get('Range'):
  2123. m = re.search(r'bytes=(\d+)-', handler.headers['Range'])
  2124. start = int(m.group(1) if m else 0)
  2125. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2126. logging.info('autorange range=%r match url=%r', handler.headers['Range'], handler.path)
  2127. elif need_autorange:
  2128. logging.info('Found [autorange]endswith match url=%r', handler.path)
  2129. m = re.search(r'bytes=(\d+)-', handler.headers.get('Range', ''))
  2130. start = int(m.group(1) if m else 0)
  2131. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2132. class GAEFetchFilter(BaseProxyHandlerFilter):
  2133. """force https filter"""
  2134. def filter(self, handler):
  2135. """https://developers.google.com/appengine/docs/python/urlfetch/"""
  2136. if handler.command == 'CONNECT':
  2137. do_ssl_handshake = 440 <= handler.port <= 450 or 1024 <= handler.port <= 65535
  2138. return [handler.STRIP, do_ssl_handshake, self if not common.URLRE_MAP else None]
  2139. elif handler.command in ('GET', 'POST', 'HEAD', 'PUT', 'DELETE', 'PATCH'):
  2140. kwargs = {}
  2141. if common.GAE_PASSWORD:
  2142. kwargs['password'] = common.GAE_PASSWORD
  2143. if common.GAE_VALIDATE:
  2144. kwargs['validate'] = 1
  2145. fetchservers = ['%s://%s.appspot.com%s' % (common.GAE_MODE, x, common.GAE_PATH) for x in common.GAE_APPIDS]
  2146. return [handler.URLFETCH, fetchservers, common.FETCHMAX_LOCAL, kwargs]
  2147. else:
  2148. if common.PHP_ENABLE:
  2149. return PHPProxyHandler.handler_filters[-1].filter(handler)
  2150. else:
  2151. logging.warning('"%s %s" not supported by GAE, please enable PHP mode!', handler.command, handler.host)
  2152. return [handler.DIRECT, {}]
  2153. class GAEProxyHandler(AdvancedProxyHandler):
  2154. """GAE Proxy Handler"""
  2155. handler_filters = [UserAgentFilter(), WithGAEFilter(), FakeHttpsFilter(), ForceHttpsFilter(), HostsFilter(), DirectRegionFilter(), AutoRangeFilter(), GAEFetchFilter()]
  2156. def first_run(self):
  2157. """GAEProxyHandler setup, init domain/iplist map"""
  2158. if not common.PROXY_ENABLE:
  2159. logging.info('resolve common.IPLIST_MAP names=%s to iplist', list(common.IPLIST_MAP))
  2160. common.resolve_iplist()
  2161. random.shuffle(common.GAE_APPIDS)
  2162. for appid in common.GAE_APPIDS:
  2163. host = '%s.appspot.com' % appid
  2164. if host not in common.HOST_MAP:
  2165. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.appspot.com']
  2166. if host not in self.dns_cache:
  2167. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2168. if common.GAE_PAGESPEED:
  2169. for i in xrange(1, 10):
  2170. host = '%d-ps.googleusercontent.com' % i
  2171. if host not in common.HOST_MAP:
  2172. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.googleusercontent.com']
  2173. if host not in self.dns_cache:
  2174. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2175. def handle_urlfetch_error(self, fetchserver, response):
  2176. gae_appid = urlparse.urlsplit(fetchserver).netloc.split('.')[-3]
  2177. if response.app_status == 503:
  2178. # appid over qouta, switch to next appid
  2179. if gae_appid == common.GAE_APPIDS[0] and len(common.GAE_APPIDS) > 1:
  2180. common.GAE_APPIDS.append(common.GAE_APPIDS.pop(0))
  2181. logging.info('gae_appid=%r over qouta, switch next appid=%r', gae_appid, common.GAE_APPIDS[0])
  2182. class PHPFetchFilter(BaseProxyHandlerFilter):
  2183. """force https filter"""
  2184. def filter(self, handler):
  2185. if handler.command == 'CONNECT':
  2186. return [handler.STRIP, True, self]
  2187. else:
  2188. kwargs = {}
  2189. if common.PHP_PASSWORD:
  2190. kwargs['password'] = common.PHP_PASSWORD
  2191. if common.PHP_VALIDATE:
  2192. kwargs['validate'] = 1
  2193. return [handler.URLFETCH, [common.PHP_FETCHSERVER], 1, kwargs]
  2194. class PHPProxyHandler(AdvancedProxyHandler):
  2195. """PHP Proxy Handler"""
  2196. first_run_lock = threading.Lock()
  2197. handler_filters = [UserAgentFilter(), FakeHttpsFilter(), ForceHttpsFilter(), PHPFetchFilter()]
  2198. def first_run(self):
  2199. if common.PHP_USEHOSTS:
  2200. self.handler_filters.insert(-1, HostsFilter())
  2201. if not common.PROXY_ENABLE:
  2202. common.resolve_iplist()
  2203. fetchhost = re.sub(r':\d+$', '', urlparse.urlsplit(common.PHP_FETCHSERVER).netloc)
  2204. logging.info('resolve common.PHP_FETCHSERVER domain=%r to iplist', fetchhost)
  2205. if common.PHP_USEHOSTS and fetchhost in common.HOST_MAP:
  2206. hostname = common.HOST_MAP[fetchhost]
  2207. fetchhost_iplist = sum([socket.gethostbyname_ex(x)[-1] for x in common.IPLIST_MAP.get(hostname) or hostname.split('|')], [])
  2208. else:
  2209. fetchhost_iplist = self.gethostbyname2(fetchhost)
  2210. if len(fetchhost_iplist) == 0:
  2211. logging.error('resolve %r domain return empty! please use ip list to replace domain list!', fetchhost)
  2212. sys.exit(-1)
  2213. self.dns_cache[fetchhost] = list(set(fetchhost_iplist))
  2214. logging.info('resolve common.PHP_FETCHSERVER domain to iplist=%r', fetchhost_iplist)
  2215. return True
  2216. class ProxyChainMixin:
  2217. """proxy chain mixin"""
  2218. def gethostbyname2(self, hostname):
  2219. try:
  2220. return socket.gethostbyname_ex(hostname)[-1]
  2221. except socket.error:
  2222. return [hostname]
  2223. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  2224. sock = socket.create_connection((common.PROXY_HOST, int(common.PROXY_PORT)))
  2225. if hostname.endswith('.appspot.com'):
  2226. hostname = 'www.google.com'
  2227. request_data = 'CONNECT %s:%s HTTP/1.1\r\n' % (hostname, port)
  2228. if common.PROXY_USERNAME and common.PROXY_PASSWROD:
  2229. request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (common.PROXY_USERNAME, common.PROXY_PASSWROD)).encode()).decode().strip()
  2230. request_data += '\r\n'
  2231. sock.sendall(request_data)
  2232. response = httplib.HTTPResponse(sock)
  2233. response.fp.close()
  2234. response.fp = sock.makefile('rb', 0)
  2235. response.begin()
  2236. if response.status >= 400:
  2237. raise httplib.BadStatusLine('%s %s %s' % (response.version, response.status, response.reason))
  2238. return sock
  2239. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  2240. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  2241. ssl_sock = ssl.wrap_socket(sock)
  2242. return ssl_sock
  2243. class GreenForwardMixin:
  2244. """green forward mixin"""
  2245. @staticmethod
  2246. def io_copy(dest, source, timeout, bufsize):
  2247. try:
  2248. dest.settimeout(timeout)
  2249. source.settimeout(timeout)
  2250. while 1:
  2251. data = source.recv(bufsize)
  2252. if not data:
  2253. break
  2254. dest.sendall(data)
  2255. except socket.timeout:
  2256. pass
  2257. except NetWorkIOError as e:
  2258. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  2259. raise
  2260. if e.args[0] in (errno.EBADF,):
  2261. return
  2262. finally:
  2263. for sock in (dest, source):
  2264. try:
  2265. sock.close()
  2266. except StandardError:
  2267. pass
  2268. def forward_socket(self, local, remote, timeout):
  2269. """forward socket"""
  2270. bufsize = self.bufsize
  2271. thread.start_new_thread(GreenForwardMixin.io_copy, (remote.dup(), local.dup(), timeout, bufsize))
  2272. GreenForwardMixin.io_copy(local, remote, timeout, bufsize)
  2273. class ProxyChainGAEProxyHandler(ProxyChainMixin, GAEProxyHandler):
  2274. pass
  2275. class ProxyChainPHPProxyHandler(ProxyChainMixin, PHPProxyHandler):
  2276. pass
  2277. class GreenForwardGAEProxyHandler(GreenForwardMixin, GAEProxyHandler):
  2278. pass
  2279. class GreenForwardPHPProxyHandler(GreenForwardMixin, PHPProxyHandler):
  2280. pass
  2281. class ProxyChainGreenForwardGAEProxyHandler(ProxyChainMixin, GreenForwardGAEProxyHandler):
  2282. pass
  2283. class ProxyChainGreenForwardPHPProxyHandler(ProxyChainMixin, GreenForwardPHPProxyHandler):
  2284. pass
  2285. def get_uptime():
  2286. if os.name == 'nt':
  2287. import ctypes
  2288. try:
  2289. tick = ctypes.windll.kernel32.GetTickCount64()
  2290. except AttributeError:
  2291. tick = ctypes.windll.kernel32.GetTickCount()
  2292. return tick / 1000.0
  2293. elif os.path.isfile('/proc/uptime'):
  2294. with open('/proc/uptime', 'rb') as fp:
  2295. uptime = fp.readline().strip().split()[0].strip()
  2296. return float(uptime)
  2297. elif any(os.path.isfile(os.path.join(x, 'uptime')) for x in os.environ['PATH'].split(os.pathsep)):
  2298. # http://www.opensource.apple.com/source/lldb/lldb-69/test/pexpect-2.4/examples/uptime.py
  2299. pattern = r'up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])'
  2300. output = os.popen('uptime').read()
  2301. duration, _, _, _, _ = re.search(pattern, output).groups()
  2302. days, hours, mins = 0, 0, 0
  2303. if 'day' in duration:
  2304. m = re.search(r'([0-9]+)\s+day', duration)
  2305. days = int(m.group(1))
  2306. if ':' in duration:
  2307. m = re.search(r'([0-9]+):([0-9]+)', duration)
  2308. hours = int(m.group(1))
  2309. mins = int(m.group(2))
  2310. if 'min' in duration:
  2311. m = re.search(r'([0-9]+)\s+min', duration)
  2312. mins = int(m.group(1))
  2313. return days * 86400 + hours * 3600 + mins * 60
  2314. else:
  2315. #TODO: support other platforms
  2316. return None
  2317. class PacUtil(object):
  2318. """GoAgent Pac Util"""
  2319. @staticmethod
  2320. def update_pacfile(filename):
  2321. listen_ip = '127.0.0.1'
  2322. autoproxy = '%s:%s' % (listen_ip, common.LISTEN_PORT)
  2323. blackhole = '%s:%s' % (listen_ip, common.PAC_PORT)
  2324. default = 'PROXY %s:%s' % (common.PROXY_HOST, common.PROXY_PORT) if common.PROXY_ENABLE else 'DIRECT'
  2325. opener = urllib2.build_opener(urllib2.ProxyHandler({'http': autoproxy, 'https': autoproxy}))
  2326. content = ''
  2327. need_update = True
  2328. with open(filename, 'rb') as fp:
  2329. content = fp.read()
  2330. try:
  2331. placeholder = '// AUTO-GENERATED RULES, DO NOT MODIFY!'
  2332. content = content[:content.index(placeholder)+len(placeholder)]
  2333. content = re.sub(r'''blackhole\s*=\s*['"]PROXY [\.\w:]+['"]''', 'blackhole = \'PROXY %s\'' % blackhole, content)
  2334. content = re.sub(r'''autoproxy\s*=\s*['"]PROXY [\.\w:]+['"]''', 'autoproxy = \'PROXY %s\'' % autoproxy, content)
  2335. content = re.sub(r'''defaultproxy\s*=\s*['"](DIRECT|PROXY [\.\w:]+)['"]''', 'defaultproxy = \'%s\'' % default, content)
  2336. content = re.sub(r'''host\s*==\s*['"][\.\w:]+['"]\s*\|\|\s*isPlainHostName''', 'host == \'%s\' || isPlainHostName' % listen_ip, content)
  2337. if content.startswith('//'):
  2338. line = '// Proxy Auto-Config file generated by autoproxy2pac, %s\r\n' % time.strftime('%Y-%m-%d %H:%M:%S')
  2339. content = line + '\r\n'.join(content.splitlines()[1:])
  2340. except ValueError:
  2341. need_update = False
  2342. try:
  2343. if common.PAC_ADBLOCK:
  2344. admode = common.PAC_ADMODE
  2345. logging.info('try download %r to update_pacfile(%r)', common.PAC_ADBLOCK, filename)
  2346. adblock_content = opener.open(common.PAC_ADBLOCK).read()
  2347. logging.info('%r downloaded, try convert it with adblock2pac', common.PAC_ADBLOCK)
  2348. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2349. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.adblock2pac, (adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode))
  2350. else:
  2351. jsrule = PacUtil.adblock2pac(adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode)
  2352. content += '\r\n' + jsrule + '\r\n'
  2353. logging.info('%r downloaded and parsed', common.PAC_ADBLOCK)
  2354. else:
  2355. content += '\r\nfunction FindProxyForURLByAdblock(url, host) {return "DIRECT";}\r\n'
  2356. except StandardError as e:
  2357. need_update = False
  2358. logging.exception('update_pacfile failed: %r', e)
  2359. try:
  2360. logging.info('try download %r to update_pacfile(%r)', common.PAC_GFWLIST, filename)
  2361. autoproxy_content = base64.b64decode(opener.open(common.PAC_GFWLIST).read())
  2362. logging.info('%r downloaded, try convert it with autoproxy2pac_lite', common.PAC_GFWLIST)
  2363. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2364. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.autoproxy2pac_lite, (autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default))
  2365. else:
  2366. jsrule = PacUtil.autoproxy2pac_lite(autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default)
  2367. content += '\r\n' + jsrule + '\r\n'
  2368. logging.info('%r downloaded and parsed', common.PAC_GFWLIST)
  2369. except StandardError as e:
  2370. need_update = False
  2371. logging.exception('update_pacfile failed: %r', e)
  2372. if need_update:
  2373. with open(filename, 'wb') as fp:
  2374. fp.write(content)
  2375. logging.info('%r successfully updated', filename)
  2376. @staticmethod
  2377. def autoproxy2pac(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2378. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2379. jsLines = []
  2380. for line in content.splitlines()[1:]:
  2381. if line and not line.startswith("!"):
  2382. use_proxy = True
  2383. if line.startswith("@@"):
  2384. line = line[2:]
  2385. use_proxy = False
  2386. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2387. if line.startswith('/') and line.endswith('/'):
  2388. jsLine = 'if (/%s/i.test(url)) return "%s";' % (line[1:-1], return_proxy)
  2389. elif line.startswith('||'):
  2390. domain = line[2:].lstrip('.')
  2391. if len(jsLines) > 0 and ('host.indexOf(".%s") >= 0' % domain in jsLines[-1] or 'host.indexOf("%s") >= 0' % domain in jsLines[-1]):
  2392. jsLines.pop()
  2393. jsLine = 'if (dnsDomainIs(host, ".%s") || host == "%s") return "%s";' % (domain, domain, return_proxy)
  2394. elif line.startswith('|'):
  2395. jsLine = 'if (url.indexOf("%s") == 0) return "%s";' % (line[1:], return_proxy)
  2396. elif '*' in line:
  2397. jsLine = 'if (shExpMatch(url, "*%s*")) return "%s";' % (line.strip('*'), return_proxy)
  2398. elif '/' not in line:
  2399. jsLine = 'if (host.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2400. else:
  2401. jsLine = 'if (url.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2402. jsLine = ' ' * indent + jsLine
  2403. if use_proxy:
  2404. jsLines.append(jsLine)
  2405. else:
  2406. jsLines.insert(0, jsLine)
  2407. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2408. return function
  2409. @staticmethod
  2410. def autoproxy2pac_lite(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2411. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2412. direct_domain_set = set([])
  2413. proxy_domain_set = set([])
  2414. for line in content.splitlines()[1:]:
  2415. if line and not line.startswith(('!', '|!', '||!')):
  2416. use_proxy = True
  2417. if line.startswith("@@"):
  2418. line = line[2:]
  2419. use_proxy = False
  2420. domain = ''
  2421. if line.startswith('/') and line.endswith('/'):
  2422. line = line[1:-1]
  2423. if line.startswith('^https?:\\/\\/[^\\/]+') and re.match(r'^(\w|\\\-|\\\.)+$', line[18:]):
  2424. domain = line[18:].replace(r'\.', '.')
  2425. else:
  2426. logging.warning('unsupport gfwlist regex: %r', line)
  2427. elif line.startswith('||'):
  2428. domain = line[2:].lstrip('*').rstrip('/')
  2429. elif line.startswith('|'):
  2430. domain = urlparse.urlsplit(line[1:]).netloc.lstrip('*')
  2431. elif line.startswith(('http://', 'https://')):
  2432. domain = urlparse.urlsplit(line).netloc.lstrip('*')
  2433. elif re.search(r'^([\w\-\_\.]+)([\*\/]|$)', line):
  2434. domain = re.split(r'[\*\/]', line)[0]
  2435. else:
  2436. pass
  2437. if '*' in domain:
  2438. domain = domain.split('*')[-1]
  2439. if not domain or re.match(r'^\w+$', domain):
  2440. logging.debug('unsupport gfwlist rule: %r', line)
  2441. continue
  2442. if use_proxy:
  2443. proxy_domain_set.add(domain)
  2444. else:
  2445. direct_domain_set.add(domain)
  2446. proxy_domain_list = sorted(set(x.lstrip('.') for x in proxy_domain_set))
  2447. autoproxy_host = ',\r\n'.join('%s"%s": 1' % (' '*indent, x) for x in proxy_domain_list)
  2448. template = '''\
  2449. var autoproxy_host = {
  2450. %(autoproxy_host)s
  2451. };
  2452. function %(func_name)s(url, host) {
  2453. var lastPos;
  2454. do {
  2455. if (autoproxy_host.hasOwnProperty(host)) {
  2456. return 'PROXY %(proxy)s';
  2457. }
  2458. lastPos = host.indexOf('.') + 1;
  2459. host = host.slice(lastPos);
  2460. } while (lastPos >= 1);
  2461. return '%(default)s';
  2462. }'''
  2463. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in template.splitlines()), '', template)
  2464. template_args = {'autoproxy_host': autoproxy_host,
  2465. 'func_name': func_name,
  2466. 'proxy': proxy,
  2467. 'default': default}
  2468. return template % template_args
  2469. @staticmethod
  2470. def urlfilter2pac(content, func_name='FindProxyForURLByUrlfilter', proxy='127.0.0.1:8086', default='DIRECT', indent=4):
  2471. """urlfilter.ini to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2472. jsLines = []
  2473. for line in content[content.index('[exclude]'):].splitlines()[1:]:
  2474. if line and not line.startswith(';'):
  2475. use_proxy = True
  2476. if line.startswith("@@"):
  2477. line = line[2:]
  2478. use_proxy = False
  2479. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2480. if '*' in line:
  2481. jsLine = 'if (shExpMatch(url, "%s")) return "%s";' % (line, return_proxy)
  2482. else:
  2483. jsLine = 'if (url == "%s") return "%s";' % (line, return_proxy)
  2484. jsLine = ' ' * indent + jsLine
  2485. if use_proxy:
  2486. jsLines.append(jsLine)
  2487. else:
  2488. jsLines.insert(0, jsLine)
  2489. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2490. return function
  2491. @staticmethod
  2492. def adblock2pac(content, func_name='FindProxyForURLByAdblock', proxy='127.0.0.1:8086', default='DIRECT', admode=1, indent=4):
  2493. """adblock list to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2494. white_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2495. black_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2496. for line in content.splitlines()[1:]:
  2497. if not line or line.startswith('!') or '##' in line or '#@#' in line:
  2498. continue
  2499. use_proxy = True
  2500. use_start = False
  2501. use_end = False
  2502. use_domain = False
  2503. use_postfix = []
  2504. if '$' in line:
  2505. posfixs = line.split('$')[-1].split(',')
  2506. if any('domain' in x for x in posfixs):
  2507. continue
  2508. if 'image' in posfixs:
  2509. use_postfix += ['.jpg', '.gif']
  2510. elif 'script' in posfixs:
  2511. use_postfix += ['.js']
  2512. else:
  2513. continue
  2514. line = line.split('$')[0]
  2515. if line.startswith("@@"):
  2516. line = line[2:]
  2517. use_proxy = False
  2518. if '||' == line[:2]:
  2519. line = line[2:]
  2520. if '/' not in line:
  2521. use_domain = True
  2522. else:
  2523. use_start = True
  2524. elif '|' == line[0]:
  2525. line = line[1:]
  2526. use_start = True
  2527. if line[-1] in ('^', '|'):
  2528. line = line[:-1]
  2529. if not use_postfix:
  2530. use_end = True
  2531. line = line.replace('^', '*').strip('*')
  2532. conditions = black_conditions if use_proxy else white_conditions
  2533. if use_start and use_end:
  2534. conditions['shExpMatch'] += ['*%s*' % line]
  2535. elif use_start:
  2536. if '*' in line:
  2537. if use_postfix:
  2538. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2539. else:
  2540. conditions['shExpMatch'] += ['*%s*' % line]
  2541. else:
  2542. conditions['url.indexOf'] += [line]
  2543. elif use_domain and use_end:
  2544. if '*' in line:
  2545. conditions['shExpMatch'] += ['%s*' % line]
  2546. else:
  2547. conditions['host'] += [line]
  2548. elif use_domain:
  2549. if line.split('/')[0].count('.') <= 1:
  2550. if use_postfix:
  2551. conditions['shExpMatch'] += ['*.%s*%s' % (line, x) for x in use_postfix]
  2552. else:
  2553. conditions['shExpMatch'] += ['*.%s*' % line]
  2554. else:
  2555. if '*' in line:
  2556. if use_postfix:
  2557. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2558. else:
  2559. conditions['shExpMatch'] += ['*%s*' % line]
  2560. else:
  2561. if use_postfix:
  2562. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2563. else:
  2564. conditions['url.indexOf'] += ['http://%s' % line]
  2565. else:
  2566. if use_postfix:
  2567. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2568. else:
  2569. conditions['shExpMatch'] += ['*%s*' % line]
  2570. templates = ['''\
  2571. function %(func_name)s(url, host) {
  2572. return '%(default)s';
  2573. }''',
  2574. '''\
  2575. var blackhole_host = {
  2576. %(blackhole_host)s
  2577. };
  2578. function %(func_name)s(url, host) {
  2579. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2580. if (blackhole_host.hasOwnProperty(host)) {
  2581. return 'PROXY %(proxy)s';
  2582. }
  2583. return '%(default)s';
  2584. }''',
  2585. '''\
  2586. var blackhole_host = {
  2587. %(blackhole_host)s
  2588. };
  2589. var blackhole_url_indexOf = [
  2590. %(blackhole_url_indexOf)s
  2591. ];
  2592. function %s(url, host) {
  2593. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2594. if (blackhole_host.hasOwnProperty(host)) {
  2595. return 'PROXY %(proxy)s';
  2596. }
  2597. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2598. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2599. return 'PROXY %(proxy)s';
  2600. }
  2601. }
  2602. return '%(default)s';
  2603. }''',
  2604. '''\
  2605. var blackhole_host = {
  2606. %(blackhole_host)s
  2607. };
  2608. var blackhole_url_indexOf = [
  2609. %(blackhole_url_indexOf)s
  2610. ];
  2611. var blackhole_shExpMatch = [
  2612. %(blackhole_shExpMatch)s
  2613. ];
  2614. function %(func_name)s(url, host) {
  2615. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2616. if (blackhole_host.hasOwnProperty(host)) {
  2617. return 'PROXY %(proxy)s';
  2618. }
  2619. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2620. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2621. return 'PROXY %(proxy)s';
  2622. }
  2623. }
  2624. for (i = 0; i < blackhole_shExpMatch.length; i++) {
  2625. if (shExpMatch(url, blackhole_shExpMatch[i])) {
  2626. return 'PROXY %(proxy)s';
  2627. }
  2628. }
  2629. return '%(default)s';
  2630. }''']
  2631. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in templates[admode].splitlines()), '', templates[admode])
  2632. template_kwargs = {'blackhole_host': ',\r\n'.join("%s'%s': 1" % (' '*indent, x) for x in sorted(black_conditions['host'])),
  2633. 'blackhole_url_indexOf': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['url.indexOf'])),
  2634. 'blackhole_shExpMatch': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['shExpMatch'])),
  2635. 'func_name': func_name,
  2636. 'proxy': proxy,
  2637. 'default': default}
  2638. return template % template_kwargs
  2639. class PacFileFilter(BaseProxyHandlerFilter):
  2640. """pac file filter"""
  2641. def filter(self, handler):
  2642. is_local_client = handler.client_address[0] in ('127.0.0.1', '::1')
  2643. pacfile = os.path.join(os.path.dirname(os.path.abspath(__file__)), common.PAC_FILE)
  2644. urlparts = urlparse.urlsplit(handler.path)
  2645. if handler.command == 'GET' and urlparts.path.lstrip('/') == common.PAC_FILE:
  2646. if urlparts.query == 'flush':
  2647. if is_local_client:
  2648. thread.start_new_thread(PacUtil.update_pacfile, (pacfile,))
  2649. else:
  2650. return [handler.MOCK, 403, {'Content-Type': 'text/plain'}, 'client address %r not allowed' % handler.client_address[0]]
  2651. if time.time() - os.path.getmtime(pacfile) > common.PAC_EXPIRED:
  2652. # check system uptime > 30 minutes
  2653. uptime = get_uptime()
  2654. if uptime and uptime > 1800:
  2655. thread.start_new_thread(lambda: os.utime(pacfile, (time.time(), time.time())) or PacUtil.update_pacfile(pacfile), tuple())
  2656. with open(pacfile, 'rb') as fp:
  2657. content = fp.read()
  2658. if not is_local_client:
  2659. listen_ip = ProxyUtil.get_listen_ip()
  2660. content = content.replace('127.0.0.1', listen_ip)
  2661. headers = {'Content-Type': 'text/plain'}
  2662. if 'gzip' in handler.headers.get('Accept-Encoding', ''):
  2663. headers['Content-Encoding'] = 'gzip'
  2664. compressobj = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
  2665. dataio = io.BytesIO()
  2666. dataio.write('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
  2667. dataio.write(compressobj.compress(content))
  2668. dataio.write(compressobj.flush())
  2669. dataio.write(struct.pack('<LL', zlib.crc32(content) & 0xFFFFFFFFL, len(content) & 0xFFFFFFFFL))
  2670. content = dataio.getvalue()
  2671. return [handler.MOCK, 200, headers, content]
  2672. class StaticFileFilter(BaseProxyHandlerFilter):
  2673. """static file filter"""
  2674. index_file = 'index.html'
  2675. def format_index_html(self, dirname):
  2676. INDEX_TEMPLATE = u'''
  2677. <html>
  2678. <title>Directory listing for $dirname</title>
  2679. <body>
  2680. <h2>Directory listing for $dirname</h2>
  2681. <hr>
  2682. <ul>
  2683. $html
  2684. </ul>
  2685. <hr>
  2686. </body></html>
  2687. '''
  2688. html = ''
  2689. if not isinstance(dirname, unicode):
  2690. dirname = dirname.decode(sys.getfilesystemencoding())
  2691. for name in os.listdir(dirname):
  2692. fullname = os.path.join(dirname, name)
  2693. suffix = u'/' if os.path.isdir(fullname) else u''
  2694. html += u'<li><a href="%s%s">%s%s</a>\r\n' % (name, suffix, name, suffix)
  2695. return string.Template(INDEX_TEMPLATE).substitute(dirname=dirname, html=html)
  2696. def filter(self, handler):
  2697. path = urlparse.urlsplit(handler.path).path
  2698. if path.startswith('/'):
  2699. path = urllib.unquote_plus(path.lstrip('/') or '.').decode('utf8')
  2700. if os.path.isdir(path):
  2701. index_file = os.path.join(path, self.index_file)
  2702. if not os.path.isfile(index_file):
  2703. content = self.format_index_html(path).encode('UTF-8')
  2704. headers = {'Content-Type': 'text/html; charset=utf-8', 'Connection': 'close'}
  2705. return [handler.MOCK, 200, headers, content]
  2706. else:
  2707. path = index_file
  2708. if os.path.isfile(path):
  2709. content_type = 'application/octet-stream'
  2710. try:
  2711. import mimetypes
  2712. content_type = mimetypes.types_map.get(os.path.splitext(path)[1])
  2713. except StandardError as e:
  2714. logging.error('import mimetypes failed: %r', e)
  2715. with open(path, 'rb') as fp:
  2716. content = fp.read()
  2717. headers = {'Connection': 'close', 'Content-Type': content_type}
  2718. return [handler.MOCK, 200, headers, content]
  2719. class BlackholeFilter(BaseProxyHandlerFilter):
  2720. """blackhole filter"""
  2721. one_pixel_gif = 'GIF89a\x01\x00\x01\x00\x80\xff\x00\xc0\xc0\xc0\x00\x00\x00!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;'
  2722. def filter(self, handler):
  2723. if handler.command == 'CONNECT':
  2724. return [handler.STRIP, True, self]
  2725. elif handler.path.startswith(('http://', 'https://')):
  2726. headers = {'Cache-Control': 'max-age=86400',
  2727. 'Expires': 'Oct, 01 Aug 2100 00:00:00 GMT',
  2728. 'Connection': 'close'}
  2729. content = ''
  2730. if urlparse.urlsplit(handler.path).path.lower().endswith(('.jpg', '.gif', '.png','.jpeg', '.bmp')):
  2731. headers['Content-Type'] = 'image/gif'
  2732. content = self.one_pixel_gif
  2733. return [handler.MOCK, 200, headers, content]
  2734. else:
  2735. return [handler.MOCK, 404, {'Connection': 'close'}, '']
  2736. class PACProxyHandler(SimpleProxyHandler):
  2737. """pac proxy handler"""
  2738. handler_filters = [PacFileFilter(), StaticFileFilter(), BlackholeFilter()]
  2739. def get_process_list():
  2740. import os
  2741. import glob
  2742. import ctypes
  2743. import collections
  2744. Process = collections.namedtuple('Process', 'pid name exe')
  2745. process_list = []
  2746. if os.name == 'nt':
  2747. PROCESS_QUERY_INFORMATION = 0x0400
  2748. PROCESS_VM_READ = 0x0010
  2749. lpidProcess = (ctypes.c_ulong * 1024)()
  2750. cb = ctypes.sizeof(lpidProcess)
  2751. cbNeeded = ctypes.c_ulong()
  2752. ctypes.windll.psapi.EnumProcesses(ctypes.byref(lpidProcess), cb, ctypes.byref(cbNeeded))
  2753. nReturned = cbNeeded.value/ctypes.sizeof(ctypes.c_ulong())
  2754. pidProcess = [i for i in lpidProcess][:nReturned]
  2755. has_queryimage = hasattr(ctypes.windll.kernel32, 'QueryFullProcessImageNameA')
  2756. for pid in pidProcess:
  2757. hProcess = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, pid)
  2758. if hProcess:
  2759. modname = ctypes.create_string_buffer(2048)
  2760. count = ctypes.c_ulong(ctypes.sizeof(modname))
  2761. if has_queryimage:
  2762. ctypes.windll.kernel32.QueryFullProcessImageNameA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2763. else:
  2764. ctypes.windll.psapi.GetModuleFileNameExA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  2765. exe = modname.value
  2766. name = os.path.basename(exe)
  2767. process_list.append(Process(pid=pid, name=name, exe=exe))
  2768. ctypes.windll.kernel32.CloseHandle(hProcess)
  2769. elif sys.platform.startswith('linux'):
  2770. for filename in glob.glob('/proc/[0-9]*/cmdline'):
  2771. pid = int(filename.split('/')[2])
  2772. exe_link = '/proc/%d/exe' % pid
  2773. if os.path.exists(exe_link):
  2774. exe = os.readlink(exe_link)
  2775. name = os.path.basename(exe)
  2776. process_list.append(Process(pid=pid, name=name, exe=exe))
  2777. else:
  2778. try:
  2779. import psutil
  2780. process_list = psutil.get_process_list()
  2781. except StandardError as e:
  2782. logging.exception('psutil.get_process_list() failed: %r', e)
  2783. return process_list
  2784. def pre_start():
  2785. if sys.platform == 'cygwin':
  2786. logging.info('cygwin is not officially supported, please continue at your own risk :)')
  2787. #sys.exit(-1)
  2788. elif os.name == 'posix':
  2789. try:
  2790. import resource
  2791. resource.setrlimit(resource.RLIMIT_NOFILE, (8192, -1))
  2792. except ValueError:
  2793. pass
  2794. elif os.name == 'nt':
  2795. import ctypes
  2796. ctypes.windll.kernel32.SetConsoleTitleW(u'GoAgent v%s' % __version__)
  2797. if not common.LISTEN_VISIBLE:
  2798. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 0)
  2799. else:
  2800. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 1)
  2801. if common.LOVE_ENABLE and random.randint(1, 100) <= 5:
  2802. title = ctypes.create_unicode_buffer(1024)
  2803. ctypes.windll.kernel32.GetConsoleTitleW(ctypes.byref(title), len(title)-1)
  2804. ctypes.windll.kernel32.SetConsoleTitleW('%s %s' % (title.value, random.choice(common.LOVE_TIP)))
  2805. blacklist = {'360safe': False,
  2806. 'QQProtect': False, }
  2807. softwares = [k for k, v in blacklist.items() if v]
  2808. if softwares:
  2809. tasklist = '\n'.join(x.name for x in get_process_list()).lower()
  2810. softwares = [x for x in softwares if x.lower() in tasklist]
  2811. if softwares:
  2812. title = u'GoAgent 建议'
  2813. error = u'某些安全软件(如 %s)可能和本软件存在冲突,造成 CPU 占用过高。\n如有此现象建议暂时退出此安全软件来继续运行GoAgent' % ','.join(softwares)
  2814. ctypes.windll.user32.MessageBoxW(None, error, title, 0)
  2815. #sys.exit(0)
  2816. if os.path.isfile('/proc/cpuinfo'):
  2817. with open('/proc/cpuinfo', 'rb') as fp:
  2818. m = re.search(r'(?im)(BogoMIPS|cpu MHz)\s+:\s+([\d\.]+)', fp.read())
  2819. if m and float(m.group(2)) < 1000:
  2820. logging.warning("*NOTE*, Please set [gae]window=2 [gae]keepalive=1")
  2821. if GAEProxyHandler.max_window != common.GAE_WINDOW:
  2822. GAEProxyHandler.max_window = common.GAE_WINDOW
  2823. if common.GAE_KEEPALIVE and common.GAE_MODE == 'https':
  2824. GAEProxyHandler.ssl_connection_keepalive = True
  2825. if common.GAE_SSLVERSION:
  2826. GAEProxyHandler.ssl_version = getattr(ssl, 'PROTOCOL_%s' % common.GAE_SSLVERSION)
  2827. GAEProxyHandler.openssl_context = OpenSSL.SSL.Context(getattr(OpenSSL.SSL, '%s_METHOD' % common.GAE_SSLVERSION))
  2828. if common.GAE_APPIDS[0] == 'goagent':
  2829. logging.critical('please edit %s to add your appid to [gae] !', common.CONFIG_FILENAME)
  2830. sys.exit(-1)
  2831. if common.GAE_MODE == 'http' and common.GAE_PASSWORD == '':
  2832. logging.critical('to enable http mode, you should set %r [gae]password = <your_pass> and [gae]options = rc4', common.CONFIG_FILENAME)
  2833. sys.exit(-1)
  2834. if common.GAE_TRANSPORT:
  2835. GAEProxyHandler.disable_transport_ssl = False
  2836. if common.GAE_REGIONS and not pygeoip:
  2837. logging.critical('to enable [gae]regions mode, you should install pygeoip')
  2838. sys.exit(-1)
  2839. if common.PAC_ENABLE:
  2840. pac_ip = ProxyUtil.get_listen_ip() if common.PAC_IP in ('', '::', '0.0.0.0') else common.PAC_IP
  2841. url = 'http://%s:%d/%s' % (pac_ip, common.PAC_PORT, common.PAC_FILE)
  2842. spawn_later(600, urllib2.build_opener(urllib2.ProxyHandler({})).open, url)
  2843. if not dnslib:
  2844. logging.error('dnslib not found, please put dnslib-0.8.3.egg to %r!', os.path.dirname(os.path.abspath(__file__)))
  2845. sys.exit(-1)
  2846. if not common.DNS_ENABLE:
  2847. if not common.HTTP_DNS:
  2848. common.HTTP_DNS = common.DNS_SERVERS[:]
  2849. for dnsservers_ref in (common.HTTP_DNS, common.DNS_SERVERS):
  2850. any(dnsservers_ref.insert(0, x) for x in [y for y in get_dnsserver_list() if y not in dnsservers_ref])
  2851. AdvancedProxyHandler.dns_servers = common.HTTP_DNS
  2852. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  2853. else:
  2854. AdvancedProxyHandler.dns_servers = common.HTTP_DNS or common.DNS_SERVERS
  2855. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  2856. if not OpenSSL:
  2857. logging.warning('python-openssl not found, please install it!')
  2858. RangeFetch.threads = common.AUTORANGE_THREADS
  2859. RangeFetch.maxsize = common.AUTORANGE_MAXSIZE
  2860. RangeFetch.bufsize = common.AUTORANGE_BUFSIZE
  2861. RangeFetch.waitsize = common.AUTORANGE_WAITSIZE
  2862. if common.LISTEN_USERNAME and common.LISTEN_PASSWORD:
  2863. GAEProxyHandler.handler_filters.insert(0, AuthFilter(common.LISTEN_USERNAME, common.LISTEN_PASSWORD))
  2864. def main():
  2865. global __file__
  2866. __file__ = os.path.abspath(__file__)
  2867. if os.path.islink(__file__):
  2868. __file__ = getattr(os, 'readlink', lambda x: x)(__file__)
  2869. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  2870. logging.basicConfig(level=logging.DEBUG if common.LISTEN_DEBUGINFO else logging.INFO, format='%(levelname)s - %(asctime)s %(message)s', datefmt='[%b %d %H:%M:%S]')
  2871. pre_start()
  2872. CertUtil.check_ca()
  2873. sys.stderr.write(common.info())
  2874. uvent_enabled = 'uvent.loop' in sys.modules and isinstance(gevent.get_hub().loop, __import__('uvent').loop.UVLoop)
  2875. if common.PHP_ENABLE:
  2876. host, port = common.PHP_LISTEN.split(':')
  2877. HandlerClass = ((PHPProxyHandler, GreenForwardPHPProxyHandler) if not common.PROXY_ENABLE else (ProxyChainPHPProxyHandler, ProxyChainGreenForwardPHPProxyHandler))[uvent_enabled]
  2878. server = LocalProxyServer((host, int(port)), HandlerClass)
  2879. thread.start_new_thread(server.serve_forever, tuple())
  2880. if common.PAC_ENABLE:
  2881. server = LocalProxyServer((common.PAC_IP, common.PAC_PORT), PACProxyHandler)
  2882. thread.start_new_thread(server.serve_forever, tuple())
  2883. if common.DNS_ENABLE:
  2884. try:
  2885. sys.path += ['.']
  2886. from dnsproxy import DNSServer
  2887. host, port = common.DNS_LISTEN.split(':')
  2888. server = DNSServer((host, int(port)), dns_servers=common.DNS_SERVERS, dns_blacklist=common.DNS_BLACKLIST, dns_tcpover=common.DNS_TCPOVER)
  2889. thread.start_new_thread(server.serve_forever, tuple())
  2890. except ImportError:
  2891. logging.exception('GoAgent DNSServer requires dnslib and gevent 1.0')
  2892. sys.exit(-1)
  2893. HandlerClass = ((GAEProxyHandler, GreenForwardGAEProxyHandler) if not common.PROXY_ENABLE else (ProxyChainGAEProxyHandler, ProxyChainGreenForwardGAEProxyHandler))[uvent_enabled]
  2894. server = LocalProxyServer((common.LISTEN_IP, common.LISTEN_PORT), HandlerClass)
  2895. try:
  2896. server.serve_forever()
  2897. except SystemError as e:
  2898. if '(libev) select: ' in repr(e):
  2899. logging.error('PLEASE START GOAGENT BY uvent.bat')
  2900. sys.exit(-1)
  2901. if __name__ == '__main__':
  2902. main()