PageRenderTime 77ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/goagent-local/proxy.py

https://github.com/greatagent3/esr
Python | 3490 lines | 3368 code | 64 blank | 58 comment | 188 complexity | 739c094e8b2c4504e77212b650d0a03a MD5 | raw file
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. # Based on GAppProxy 2.0.0 by Du XiaoGang <dugang.2008@gmail.com>
  4. # Based on WallProxy 0.4.0 by Hust Moon <www.ehust@gmail.com>
  5. # Contributor:
  6. # Phus Lu <phus.lu@gmail.com>
  7. # Hewig Xu <hewigovens@gmail.com>
  8. # Ayanamist Yang <ayanamist@gmail.com>
  9. # V.E.O <V.E.O@tom.com>
  10. # Max Lv <max.c.lv@gmail.com>
  11. # AlsoTang <alsotang@gmail.com>
  12. # Christopher Meng <i@cicku.me>
  13. # Yonsm Guo <YonsmGuo@gmail.com>
  14. # Parkman <cseparkman@gmail.com>
  15. # Ming Bai <mbbill@gmail.com>
  16. # Bin Yu <yubinlove1991@gmail.com>
  17. # lileixuan <lileixuan@gmail.com>
  18. # Cong Ding <cong@cding.org>
  19. # Zhang Youfu <zhangyoufu@gmail.com>
  20. # Lu Wei <luwei@barfoo>
  21. # Harmony Meow <harmony.meow@gmail.com>
  22. # logostream <logostream@gmail.com>
  23. # Rui Wang <isnowfy@gmail.com>
  24. # Wang Wei Qiang <wwqgtxx@gmail.com>
  25. # Felix Yan <felixonmars@gmail.com>
  26. # Sui Feng <suifeng.me@qq.com>
  27. # QXO <qxodream@gmail.com>
  28. # Geek An <geekan@foxmail.com>
  29. # Poly Rabbit <mcx_221@foxmail.com>
  30. # oxnz <yunxinyi@gmail.com>
  31. # Shusen Liu <liushusen.smart@gmail.com>
  32. # Yad Smood <y.s.inside@gmail.com>
  33. # Chen Shuang <cs0x7f@gmail.com>
  34. # cnfuyu <cnfuyu@gmail.com>
  35. # cuixin <steven.cuixin@gmail.com>
  36. # s2marine0 <s2marine0@gmail.com>
  37. # Toshio Xiang <snachx@gmail.com>
  38. # Bo Tian <dxmtb@163.com>
  39. # Virgil <variousvirgil@gmail.com>
  40. # hub01 <miaojiabumiao@yeah.net>
  41. # v3aqb <sgzz.cj@gmail.com>
  42. # Oling Cat <olingcat@gmail.com>
  43. __version__ = '3.1.18'
  44. import sys
  45. import os
  46. import glob
  47. reload(sys).setdefaultencoding('UTF-8')
  48. sys.dont_write_bytecode = True
  49. sys.path += glob.glob('%s/*.egg' % os.path.dirname(os.path.abspath(__file__)))
  50. try:
  51. from key_config import __RSA_KEY__
  52. except (ImportError, SystemError):
  53. __RSA_KEY__ = None
  54. try:
  55. from key_config import __RANGEFETCH_RSA_KEY__
  56. except (ImportError, SystemError):
  57. __RANGEFETCH_RSA_KEY__ = __RSA_KEY__
  58. try:
  59. import gevent
  60. import gevent.socket
  61. import gevent.server
  62. import gevent.queue
  63. import gevent.monkey
  64. gevent.monkey.patch_all(subprocess=True)
  65. except ImportError:
  66. gevent = None
  67. except TypeError:
  68. gevent.monkey.patch_all()
  69. sys.stderr.write('\033[31m Warning: Please update gevent to the latest 1.0 version!\033[0m\n')
  70. import errno
  71. import time
  72. import struct
  73. import collections
  74. import binascii
  75. import zlib
  76. import itertools
  77. import re
  78. import io
  79. import fnmatch
  80. import traceback
  81. import random
  82. import base64
  83. import string
  84. import hashlib
  85. import uuid
  86. import threading
  87. import thread
  88. import socket
  89. import ssl
  90. import select
  91. import Queue
  92. import SocketServer
  93. import ConfigParser
  94. import BaseHTTPServer
  95. import httplib
  96. import urllib
  97. import urllib2
  98. import urlparse
  99. try:
  100. import dnslib
  101. except ImportError:
  102. dnslib = None
  103. try:
  104. import OpenSSL
  105. except ImportError:
  106. OpenSSL = None
  107. try:
  108. import pygeoip
  109. except ImportError:
  110. pygeoip = None
  111. HAS_PYPY = hasattr(sys, 'pypy_version_info')
  112. NetWorkIOError = (socket.error, ssl.SSLError, OSError) if not OpenSSL else (socket.error, ssl.SSLError, OpenSSL.SSL.Error, OSError)
  113. class Logging(type(sys)):
  114. CRITICAL = 50
  115. FATAL = CRITICAL
  116. ERROR = 40
  117. WARNING = 30
  118. WARN = WARNING
  119. INFO = 20
  120. DEBUG = 10
  121. NOTSET = 0
  122. def __init__(self, *args, **kwargs):
  123. self.level = self.__class__.INFO
  124. self.__set_error_color = lambda: None
  125. self.__set_warning_color = lambda: None
  126. self.__set_debug_color = lambda: None
  127. self.__reset_color = lambda: None
  128. if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
  129. if os.name == 'nt':
  130. import ctypes
  131. SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute
  132. GetStdHandle = ctypes.windll.kernel32.GetStdHandle
  133. self.__set_error_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x04)
  134. self.__set_warning_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x06)
  135. self.__set_debug_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x002)
  136. self.__reset_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x07)
  137. elif os.name == 'posix':
  138. self.__set_error_color = lambda: sys.stderr.write('\033[31m')
  139. self.__set_warning_color = lambda: sys.stderr.write('\033[33m')
  140. self.__set_debug_color = lambda: sys.stderr.write('\033[32m')
  141. self.__reset_color = lambda: sys.stderr.write('\033[0m')
  142. @classmethod
  143. def getLogger(cls, *args, **kwargs):
  144. return cls(*args, **kwargs)
  145. def basicConfig(self, *args, **kwargs):
  146. self.level = int(kwargs.get('level', self.__class__.INFO))
  147. if self.level > self.__class__.DEBUG:
  148. self.debug = self.dummy
  149. def log(self, level, fmt, *args, **kwargs):
  150. sys.stderr.write('%s - [%s] %s\n' % (level, time.ctime()[4:-5], fmt % args))
  151. def dummy(self, *args, **kwargs):
  152. pass
  153. def debug(self, fmt, *args, **kwargs):
  154. self.__set_debug_color()
  155. self.log('DEBUG', fmt, *args, **kwargs)
  156. self.__reset_color()
  157. def info(self, fmt, *args, **kwargs):
  158. self.log('INFO', fmt, *args)
  159. def warning(self, fmt, *args, **kwargs):
  160. self.__set_warning_color()
  161. self.log('WARNING', fmt, *args, **kwargs)
  162. self.__reset_color()
  163. def warn(self, fmt, *args, **kwargs):
  164. self.warning(fmt, *args, **kwargs)
  165. def error(self, fmt, *args, **kwargs):
  166. self.__set_error_color()
  167. self.log('ERROR', fmt, *args, **kwargs)
  168. self.__reset_color()
  169. def exception(self, fmt, *args, **kwargs):
  170. self.error(fmt, *args, **kwargs)
  171. sys.stderr.write(traceback.format_exc() + '\n')
  172. def critical(self, fmt, *args, **kwargs):
  173. self.__set_error_color()
  174. self.log('CRITICAL', fmt, *args, **kwargs)
  175. self.__reset_color()
  176. logging = sys.modules['logging'] = Logging('logging')
  177. class LRUCache(object):
  178. """http://pypi.python.org/pypi/lru/"""
  179. def __init__(self, max_items=100):
  180. self.cache = {}
  181. self.key_order = []
  182. self.max_items = max_items
  183. def __setitem__(self, key, value):
  184. self.cache[key] = value
  185. self._mark(key)
  186. def __getitem__(self, key):
  187. value = self.cache[key]
  188. self._mark(key)
  189. return value
  190. def __contains__(self, key):
  191. return key in self.cache
  192. def _mark(self, key):
  193. if key in self.key_order:
  194. self.key_order.remove(key)
  195. self.key_order.insert(0, key)
  196. if len(self.key_order) > self.max_items:
  197. index = self.max_items // 2
  198. delitem = self.cache.__delitem__
  199. key_order = self.key_order
  200. any(delitem(key_order[x]) for x in xrange(index, len(key_order)))
  201. self.key_order = self.key_order[:index]
  202. def clear(self):
  203. self.cache = {}
  204. self.key_order = []
  205. class CertUtil(object):
  206. """CertUtil module, based on mitmproxy"""
  207. ca_vendor = 'GoAgent'
  208. ca_keyfile = 'CA.crt'
  209. ca_certdir = 'certs'
  210. ca_lock = threading.Lock()
  211. @staticmethod
  212. def create_ca():
  213. key = OpenSSL.crypto.PKey()
  214. key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  215. ca = OpenSSL.crypto.X509()
  216. ca.set_serial_number(0)
  217. ca.set_version(2)
  218. subj = ca.get_subject()
  219. subj.countryName = 'CN'
  220. subj.stateOrProvinceName = 'Internet'
  221. subj.localityName = 'Cernet'
  222. subj.organizationName = CertUtil.ca_vendor
  223. subj.organizationalUnitName = '%s Root' % CertUtil.ca_vendor
  224. subj.commonName = '%s CA' % CertUtil.ca_vendor
  225. ca.gmtime_adj_notBefore(0)
  226. ca.gmtime_adj_notAfter(24 * 60 * 60 * 3652)
  227. ca.set_issuer(ca.get_subject())
  228. ca.set_pubkey(key)
  229. ca.add_extensions([
  230. OpenSSL.crypto.X509Extension(b'basicConstraints', True, b'CA:TRUE'),
  231. # OpenSSL.crypto.X509Extension(b'nsCertType', True, b'sslCA'),
  232. OpenSSL.crypto.X509Extension(b'extendedKeyUsage', True, b'serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC'),
  233. OpenSSL.crypto.X509Extension(b'keyUsage', False, b'keyCertSign, cRLSign'),
  234. OpenSSL.crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', subject=ca), ])
  235. ca.sign(key, 'sha1')
  236. return key, ca
  237. @staticmethod
  238. def dump_ca():
  239. key, ca = CertUtil.create_ca()
  240. with open(CertUtil.ca_keyfile, 'wb') as fp:
  241. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, ca))
  242. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key))
  243. @staticmethod
  244. def _get_cert(commonname, sans=()):
  245. with open(CertUtil.ca_keyfile, 'rb') as fp:
  246. content = fp.read()
  247. key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, content)
  248. ca = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, content)
  249. pkey = OpenSSL.crypto.PKey()
  250. pkey.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  251. req = OpenSSL.crypto.X509Req()
  252. subj = req.get_subject()
  253. subj.countryName = 'CN'
  254. subj.stateOrProvinceName = 'Internet'
  255. subj.localityName = 'Cernet'
  256. subj.organizationalUnitName = '%s Branch' % CertUtil.ca_vendor
  257. if commonname[0] == '.':
  258. subj.commonName = '*' + commonname
  259. subj.organizationName = '*' + commonname
  260. sans = ['*'+commonname] + [x for x in sans if x != '*'+commonname]
  261. else:
  262. subj.commonName = commonname
  263. subj.organizationName = commonname
  264. sans = [commonname] + [x for x in sans if x != commonname]
  265. #req.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans)).encode()])
  266. req.set_pubkey(pkey)
  267. req.sign(pkey, 'sha1')
  268. cert = OpenSSL.crypto.X509()
  269. cert.set_version(2)
  270. try:
  271. cert.set_serial_number(int(hashlib.md5(commonname.encode('utf-8')).hexdigest(), 16))
  272. except OpenSSL.SSL.Error:
  273. cert.set_serial_number(int(time.time()*1000))
  274. cert.gmtime_adj_notBefore(0)
  275. cert.gmtime_adj_notAfter(60 * 60 * 24 * 3652)
  276. cert.set_issuer(ca.get_subject())
  277. cert.set_subject(req.get_subject())
  278. cert.set_pubkey(req.get_pubkey())
  279. if commonname[0] == '.':
  280. sans = ['*'+commonname] + [s for s in sans if s != '*'+commonname]
  281. else:
  282. sans = [commonname] + [s for s in sans if s != commonname]
  283. #cert.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  284. cert.sign(key, 'sha1')
  285. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  286. with open(certfile, 'wb') as fp:
  287. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert))
  288. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, pkey))
  289. return certfile
  290. @staticmethod
  291. def get_cert(commonname, sans=()):
  292. if commonname.count('.') >= 2 and [len(x) for x in reversed(commonname.split('.'))] > [2, 4]:
  293. commonname = '.'+commonname.partition('.')[-1]
  294. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  295. if os.path.exists(certfile):
  296. return certfile
  297. elif OpenSSL is None:
  298. return CertUtil.ca_keyfile
  299. else:
  300. with CertUtil.ca_lock:
  301. if os.path.exists(certfile):
  302. return certfile
  303. return CertUtil._get_cert(commonname, sans)
  304. @staticmethod
  305. def import_ca(certfile):
  306. commonname = os.path.splitext(os.path.basename(certfile))[0]
  307. sha1digest = 'AB:70:2C:DF:18:EB:E8:B4:38:C5:28:69:CD:4A:5D:EF:48:B4:0E:33'
  308. if OpenSSL:
  309. try:
  310. with open(certfile, 'rb') as fp:
  311. x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, fp.read())
  312. commonname = next(v.decode() for k, v in x509.get_subject().get_components() if k == b'O')
  313. sha1digest = x509.digest('sha1')
  314. except StandardError as e:
  315. logging.error('load_certificate(certfile=%r) failed:%s', certfile, e)
  316. if sys.platform.startswith('win'):
  317. import ctypes
  318. with open(certfile, 'rb') as fp:
  319. certdata = fp.read()
  320. if certdata.startswith(b'-----'):
  321. begin = b'-----BEGIN CERTIFICATE-----'
  322. end = b'-----END CERTIFICATE-----'
  323. certdata = base64.b64decode(b''.join(certdata[certdata.find(begin)+len(begin):certdata.find(end)].strip().splitlines()))
  324. crypt32 = ctypes.WinDLL(b'crypt32.dll'.decode())
  325. store_handle = crypt32.CertOpenStore(10, 0, 0, 0x4000 | 0x20000, b'ROOT'.decode())
  326. if not store_handle:
  327. return -1
  328. X509_ASN_ENCODING = 0x00000001
  329. CERT_FIND_HASH = 0x10000
  330. class CRYPT_HASH_BLOB(ctypes.Structure):
  331. _fields_ = [('cbData', ctypes.c_ulong), ('pbData', ctypes.c_char_p)]
  332. crypt_hash = CRYPT_HASH_BLOB(20, binascii.a2b_hex(sha1digest.replace(':', '')))
  333. crypt_handle = crypt32.CertFindCertificateInStore(store_handle, X509_ASN_ENCODING, 0, CERT_FIND_HASH, ctypes.byref(crypt_hash), None)
  334. if crypt_handle:
  335. crypt32.CertFreeCertificateContext(crypt_handle)
  336. return 0
  337. ret = crypt32.CertAddEncodedCertificateToStore(store_handle, 0x1, certdata, len(certdata), 4, None)
  338. crypt32.CertCloseStore(store_handle, 0)
  339. del crypt32
  340. return 0 if ret else -1
  341. elif sys.platform == 'darwin':
  342. return os.system(('security find-certificate -a -c "%s" | grep "%s" >/dev/null || security add-trusted-cert -d -r trustRoot -k "/Library/Keychains/System.keychain" "%s"' % (commonname, commonname, certfile.decode('utf-8'))).encode('utf-8'))
  343. elif sys.platform.startswith('linux'):
  344. import platform
  345. platform_distname = platform.dist()[0]
  346. if platform_distname == 'Ubuntu':
  347. pemfile = "/etc/ssl/certs/%s.pem" % commonname
  348. new_certfile = "/usr/local/share/ca-certificates/%s.crt" % commonname
  349. if not os.path.exists(pemfile):
  350. return os.system('cp "%s" "%s" && update-ca-certificates' % (certfile, new_certfile))
  351. elif any(os.path.isfile('%s/certutil' % x) for x in os.environ['PATH'].split(os.pathsep)):
  352. return os.system('certutil -L -d sql:$HOME/.pki/nssdb | grep "%s" || certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "%s" -i "%s"' % (commonname, commonname, certfile))
  353. else:
  354. logging.warning('please install *libnss3-tools* package to import GoAgent root ca')
  355. return 0
  356. @staticmethod
  357. def check_ca():
  358. #Check CA exists
  359. capath = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_keyfile)
  360. certdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_certdir)
  361. if not os.path.exists(capath):
  362. if not OpenSSL:
  363. logging.critical('CA.key is not exist and OpenSSL is disabled, ABORT!')
  364. sys.exit(-1)
  365. if os.path.exists(certdir):
  366. if os.path.isdir(certdir):
  367. any(os.remove(x) for x in glob.glob(certdir+'/*.crt')+glob.glob(certdir+'/.*.crt'))
  368. else:
  369. os.remove(certdir)
  370. os.mkdir(certdir)
  371. CertUtil.dump_ca()
  372. if glob.glob('%s/*.key' % CertUtil.ca_certdir):
  373. for filename in glob.glob('%s/*.key' % CertUtil.ca_certdir):
  374. try:
  375. os.remove(filename)
  376. os.remove(os.path.splitext(filename)[0]+'.crt')
  377. except EnvironmentError:
  378. pass
  379. #Check CA imported
  380. if CertUtil.import_ca(capath) != 0:
  381. logging.warning('install root certificate failed, Please run as administrator/root/sudo')
  382. #Check Certs Dir
  383. if not os.path.exists(certdir):
  384. os.makedirs(certdir)
  385. class DetectMobileBrowser:
  386. """detect mobile function from http://detectmobilebrowsers.com"""
  387. regex_match_a = re.compile(r"(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", re.I|re.M).search
  388. regex_match_b = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\\-|your|zeto|zte\\-", re.I|re.M).search
  389. @staticmethod
  390. def detect(user_agent):
  391. return DetectMobileBrowser.regex_match_a(user_agent) or DetectMobileBrowser.regex_match_b(user_agent)
  392. class SSLConnection(object):
  393. """OpenSSL Connection Wapper"""
  394. def __init__(self, context, sock):
  395. self._context = context
  396. self._sock = sock
  397. self._connection = OpenSSL.SSL.Connection(context, sock)
  398. self._makefile_refs = 0
  399. def __getattr__(self, attr):
  400. if attr not in ('_context', '_sock', '_connection', '_makefile_refs'):
  401. return getattr(self._connection, attr)
  402. def __wait_sock_io(self, io_func, *args, **kwargs):
  403. timeout = self._sock.gettimeout() or 0.1
  404. fd = self._sock.fileno()
  405. while True:
  406. try:
  407. return io_func(*args, **kwargs)
  408. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  409. sys.exc_clear()
  410. _, _, errors = select.select([fd], [], [fd], timeout)
  411. if errors:
  412. break
  413. except OpenSSL.SSL.WantWriteError:
  414. sys.exc_clear()
  415. _, _, errors = select.select([], [fd], [fd], timeout)
  416. if errors:
  417. break
  418. def accept(self):
  419. sock, addr = self._sock.accept()
  420. client = OpenSSL.SSL.Connection(sock._context, sock)
  421. return client, addr
  422. def do_handshake(self):
  423. return self.__wait_sock_io(self._connection.do_handshake)
  424. def connect(self, *args, **kwargs):
  425. return self.__wait_sock_io(self._connection.connect, *args, **kwargs)
  426. def send(self, data, flags=0):
  427. try:
  428. return self.__wait_sock_io(self._connection.send, data, flags)
  429. except OpenSSL.SSL.SysCallError as e:
  430. if e[0] == -1 and not data:
  431. # errors when writing empty strings are expected and can be ignored
  432. return 0
  433. raise
  434. def recv(self, bufsiz, flags=0):
  435. pending = self._connection.pending()
  436. if pending:
  437. return self._connection.recv(min(pending, bufsiz))
  438. try:
  439. return self.__wait_sock_io(self._connection.recv, bufsiz, flags)
  440. except OpenSSL.SSL.ZeroReturnError:
  441. return ''
  442. def read(self, bufsiz, flags=0):
  443. return self.recv(bufsiz, flags)
  444. def write(self, buf, flags=0):
  445. return self.sendall(buf, flags)
  446. def close(self):
  447. if self._makefile_refs < 1:
  448. self._connection = None
  449. if self._sock:
  450. socket.socket.close(self._sock)
  451. else:
  452. self._makefile_refs -= 1
  453. def makefile(self, mode='r', bufsize=-1):
  454. self._makefile_refs += 1
  455. return socket._fileobject(self, mode, bufsize, close=True)
  456. @staticmethod
  457. def context_builder(ssl_version='SSLv23', ca_certs=None, cipher_suites=('ALL', '!aNULL', '!eNULL')):
  458. protocol_version = getattr(OpenSSL.SSL, '%s_METHOD' % ssl_version)
  459. ssl_context = OpenSSL.SSL.Context(protocol_version)
  460. if ca_certs:
  461. ssl_context.load_verify_locations(os.path.abspath(ca_certs))
  462. ssl_context.set_verify(OpenSSL.SSL.VERIFY_PEER, lambda c, x, e, d, ok: ok)
  463. else:
  464. ssl_context.set_verify(OpenSSL.SSL.VERIFY_NONE, lambda c, x, e, d, ok: ok)
  465. ssl_context.set_cipher_list(':'.join(cipher_suites))
  466. if hasattr(OpenSSL.SSL, 'SESS_CACHE_BOTH'):
  467. ssl_context.set_session_cache_mode(OpenSSL.SSL.SESS_CACHE_BOTH)
  468. return ssl_context
  469. class ProxyUtil(object):
  470. """ProxyUtil module, based on urllib2"""
  471. @staticmethod
  472. def parse_proxy(proxy):
  473. return urllib2._parse_proxy(proxy)
  474. @staticmethod
  475. def get_system_proxy():
  476. proxies = urllib2.getproxies()
  477. return proxies.get('https') or proxies.get('http') or {}
  478. @staticmethod
  479. def get_listen_ip():
  480. listen_ip = '127.0.0.1'
  481. sock = None
  482. try:
  483. sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  484. sock.connect(('8.8.8.8', 53))
  485. listen_ip = sock.getsockname()[0]
  486. except socket.error:
  487. pass
  488. finally:
  489. if sock:
  490. sock.close()
  491. return listen_ip
  492. def inflate(data):
  493. return zlib.decompress(data, -zlib.MAX_WBITS)
  494. def deflate(data):
  495. return zlib.compress(data)[2:-4]
  496. def parse_hostport(host, default_port=80):
  497. m = re.match(r'(.+)[#](\d+)$', host)
  498. if m:
  499. return m.group(1).strip('[]'), int(m.group(2))
  500. else:
  501. return host.strip('[]'), default_port
  502. def dnslib_resolve_over_udp(query, dnsservers, timeout, **kwargs):
  503. """
  504. http://gfwrev.blogspot.com/2009/11/gfwdns.html
  505. http://zh.wikipedia.org/wiki/%E5%9F%9F%E5%90%8D%E6%9C%8D%E5%8A%A1%E5%99%A8%E7%BC%93%E5%AD%98%E6%B1%A1%E6%9F%93
  506. http://support.microsoft.com/kb/241352
  507. """
  508. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  509. raise TypeError('query argument requires string/DNSRecord')
  510. blacklist = kwargs.get('blacklist', ())
  511. turstservers = kwargs.get('turstservers', ())
  512. dns_v4_servers = [x for x in dnsservers if ':' not in x]
  513. dns_v6_servers = [x for x in dnsservers if ':' in x]
  514. sock_v4 = sock_v6 = None
  515. socks = []
  516. if dns_v4_servers:
  517. sock_v4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  518. socks.append(sock_v4)
  519. if dns_v6_servers:
  520. sock_v6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
  521. socks.append(sock_v6)
  522. timeout_at = time.time() + timeout
  523. try:
  524. for _ in xrange(4):
  525. try:
  526. for dnsserver in dns_v4_servers:
  527. if isinstance(query, basestring):
  528. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query))
  529. query_data = query.pack()
  530. sock_v4.sendto(query_data, parse_hostport(dnsserver, 53))
  531. for dnsserver in dns_v6_servers:
  532. if isinstance(query, basestring):
  533. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=dnslib.QTYPE.AAAA))
  534. query_data = query.pack()
  535. sock_v6.sendto(query_data, parse_hostport(dnsserver, 53))
  536. while time.time() < timeout_at:
  537. ins, _, _ = select.select(socks, [], [], 0.1)
  538. for sock in ins:
  539. reply_data, reply_address = sock.recvfrom(512)
  540. reply_server = reply_address[0]
  541. record = dnslib.DNSRecord.parse(reply_data)
  542. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  543. if any(x in blacklist for x in iplist):
  544. logging.debug('query=%r dnsservers=%r record bad iplist=%r', query, dnsservers, iplist)
  545. elif record.header.rcode and not iplist and reply_server in turstservers:
  546. logging.info('query=%r trust reply_server=%r record rcode=%s', query, reply_server, record.header.rcode)
  547. return record
  548. elif iplist:
  549. logging.debug('query=%r reply_server=%r record iplist=%s', query, reply_server, iplist)
  550. return record
  551. else:
  552. logging.debug('query=%r reply_server=%r record null iplist=%s', query, reply_server, iplist)
  553. continue
  554. except socket.error as e:
  555. logging.warning('handle dns query=%s socket: %r', query, e)
  556. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  557. finally:
  558. for sock in socks:
  559. sock.close()
  560. def dnslib_resolve_over_tcp(query, dnsservers, timeout, **kwargs):
  561. """dns query over tcp"""
  562. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  563. raise TypeError('query argument requires string/DNSRecord')
  564. blacklist = kwargs.get('blacklist', ())
  565. def do_resolve(query, dnsserver, timeout, queobj):
  566. if isinstance(query, basestring):
  567. qtype = dnslib.QTYPE.AAAA if ':' in dnsserver else dnslib.QTYPE.A
  568. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=qtype))
  569. query_data = query.pack()
  570. sock_family = socket.AF_INET6 if ':' in dnsserver else socket.AF_INET
  571. sock = socket.socket(sock_family)
  572. rfile = None
  573. try:
  574. sock.settimeout(timeout or None)
  575. sock.connect(parse_hostport(dnsserver, 53))
  576. sock.send(struct.pack('>h', len(query_data)) + query_data)
  577. rfile = sock.makefile('r', 1024)
  578. reply_data_length = rfile.read(2)
  579. if len(reply_data_length) < 2:
  580. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  581. reply_data = rfile.read(struct.unpack('>h', reply_data_length)[0])
  582. record = dnslib.DNSRecord.parse(reply_data)
  583. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  584. if any(x in blacklist for x in iplist):
  585. logging.debug('query=%r dnsserver=%r record bad iplist=%r', query, dnsserver, iplist)
  586. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  587. else:
  588. logging.debug('query=%r dnsserver=%r record iplist=%s', query, dnsserver, iplist)
  589. queobj.put(record)
  590. except socket.error as e:
  591. logging.debug('query=%r dnsserver=%r failed %r', query, dnsserver, e)
  592. queobj.put(e)
  593. finally:
  594. if rfile:
  595. rfile.close()
  596. sock.close()
  597. queobj = Queue.Queue()
  598. for dnsserver in dnsservers:
  599. thread.start_new_thread(do_resolve, (query, dnsserver, timeout, queobj))
  600. for i in range(len(dnsservers)):
  601. try:
  602. result = queobj.get(timeout)
  603. except Queue.Empty:
  604. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  605. if result and not isinstance(result, Exception):
  606. return result
  607. elif i == len(dnsservers) - 1:
  608. logging.debug('dnslib_resolve_over_tcp %r with %s return %r', query, dnsservers, result)
  609. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  610. def dnslib_record2iplist(record):
  611. """convert dnslib.DNSRecord to iplist"""
  612. assert isinstance(record, dnslib.DNSRecord)
  613. iplist = [x for x in (str(r.rdata) for r in record.rr) if re.match(r'^\d+\.\d+\.\d+\.\d+$', x) or ':' in x]
  614. return iplist
  615. def get_dnsserver_list():
  616. if os.name == 'nt':
  617. import ctypes
  618. import ctypes.wintypes
  619. DNS_CONFIG_DNS_SERVER_LIST = 6
  620. buf = ctypes.create_string_buffer(2048)
  621. ctypes.windll.dnsapi.DnsQueryConfig(DNS_CONFIG_DNS_SERVER_LIST, 0, None, None, ctypes.byref(buf), ctypes.byref(ctypes.wintypes.DWORD(len(buf))))
  622. ipcount = struct.unpack('I', buf[0:4])[0]
  623. iplist = [socket.inet_ntoa(buf[i:i+4]) for i in xrange(4, ipcount*4+4, 4)]
  624. return iplist
  625. elif os.path.isfile('/etc/resolv.conf'):
  626. with open('/etc/resolv.conf', 'rb') as fp:
  627. return re.findall(r'(?m)^nameserver\s+(\S+)', fp.read())
  628. else:
  629. logging.warning("get_dnsserver_list failed: unsupport platform '%s-%s'", sys.platform, os.name)
  630. return []
  631. def spawn_later(seconds, target, *args, **kwargs):
  632. def wrap(*args, **kwargs):
  633. time.sleep(seconds)
  634. return target(*args, **kwargs)
  635. return thread.start_new_thread(wrap, args, kwargs)
  636. def spawn_period(seconds, target, *args, **kwargs):
  637. def wrap(*args, **kwargs):
  638. try:
  639. time.sleep(seconds)
  640. target(*args, **kwargs)
  641. except StandardError as e:
  642. logging.warning('%r(%s, %s) error: %r', target, args, kwargs, e)
  643. return thread.start_new_thread(wrap, args, kwargs)
  644. def is_clienthello(data):
  645. if len(data) < 20:
  646. return False
  647. if data.startswith('\x16\x03'):
  648. # TLSv12/TLSv11/TLSv1/SSLv3
  649. length, = struct.unpack('>h', data[3:5])
  650. return len(data) == 5 + length
  651. elif data[0] == '\x80' and data[2:4] == '\x01\x03':
  652. # SSLv23
  653. return len(data) == 2 + ord(data[1])
  654. else:
  655. return False
  656. def is_google_ip(ipaddr):
  657. return ipaddr.startswith(('173.194.', '207.126.', '209.85.', '216.239.', '64.18.', '64.233.', '66.102.', '66.249.', '72.14.', '74.125.'))
  658. def extract_sni_name(packet):
  659. if packet.startswith('\x16\x03'):
  660. stream = io.BytesIO(packet)
  661. stream.read(0x2b)
  662. session_id_length = ord(stream.read(1))
  663. stream.read(session_id_length)
  664. cipher_suites_length, = struct.unpack('>h', stream.read(2))
  665. stream.read(cipher_suites_length+2)
  666. extensions_length, = struct.unpack('>h', stream.read(2))
  667. # extensions = {}
  668. while True:
  669. data = stream.read(2)
  670. if not data:
  671. break
  672. etype, = struct.unpack('>h', data)
  673. elen, = struct.unpack('>h', stream.read(2))
  674. edata = stream.read(elen)
  675. if etype == 0:
  676. server_name = edata[5:]
  677. return server_name
  678. class URLFetch(object):
  679. """URLFetch for gae/php fetchservers"""
  680. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  681. def __init__(self, handler, fetchserver):
  682. assert isinstance(fetchserver, basestring) and callable(create_http_request)
  683. self.handler = handler
  684. self.fetchserver = fetchserver
  685. self.create_http_request = handler.create_http_request
  686. def fetch(self, method, url, headers, body, timeout, **kwargs):
  687. if '.appspot.com/' in self.fetchserver:
  688. response = self.__gae_fetch(method, url, headers, body, timeout, **kwargs)
  689. response.app_header_parsed = True
  690. else:
  691. response = self.__php_fetch(method, url, headers, body, timeout, **kwargs)
  692. response.app_header_parsed = False
  693. return response
  694. def __gae_fetch(self, method, url, headers, body, timeout, **kwargs):
  695. rc4crypt = lambda s, k: RC4Cipher(k).encrypt(s) if k else s
  696. options = ''
  697. rsa_key = None
  698. if 'options' in kwargs:
  699. options = kwargs.get('options')
  700. del kwargs['options']
  701. if 'rsa_key' in kwargs:
  702. rsa_key = kwargs.get('rsa_key')
  703. del kwargs['rsa_key']
  704. if isinstance(body, basestring) and body:
  705. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  706. zbody = deflate(body)
  707. if len(zbody) < len(body):
  708. body = zbody
  709. headers['Content-Encoding'] = 'deflate'
  710. headers['Content-Length'] = str(len(body))
  711. # GAE donot allow set `Host` header
  712. if 'Host' in headers:
  713. del headers['Host']
  714. metadata = 'G-Method:%s\nG-Url:%s\n%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v))
  715. skip_headers = self.skip_headers
  716. metadata += ''.join('%s:%s\n' % (k.title(), v) for k, v in headers.items() if k not in skip_headers)
  717. # prepare GAE request
  718. request_fetchserver = self.fetchserver
  719. request_method = 'POST'
  720. request_headers = {}
  721. if 'rsa' in options and rsa_key:
  722. from Crypto.PublicKey import RSA
  723. from Crypto.Cipher import PKCS1_OAEP
  724. from Crypto.Random.random import StrongRandom
  725. rsakey = RSA.importKey(rsa_key.strip())
  726. rsakey = PKCS1_OAEP.new(rsakey)
  727. crypt_cookie_key = base64.b64encode(read_random_bits(256))
  728. crypt_payload_key = base64.b64encode(read_random_bits(256))
  729. crypt_response_msg_key = base64.b64encode(read_random_bits(256))
  730. crypt_response_fp_key = base64.b64encode(read_random_bits(256))
  731. crypt_keys = crypt_cookie_key + '|' + crypt_payload_key + '|' + crypt_response_msg_key + '|' + crypt_response_fp_key
  732. request_headers['X-GOA-KEYS'] = base64.b64encode(rsakey.encrypt(crypt_keys))
  733. else:
  734. crypt_cookie_key = kwargs.get('password')
  735. crypt_payload_key = kwargs.get('password')
  736. crypt_response_msg_key = kwargs.get('password')
  737. crypt_response_fp_key = kwargs.get('password')
  738. if common.GAE_OBFUSCATE:
  739. # if 'rc4' in options:
  740. # request_headers['X-GOA-Options'] = 'rc4'
  741. # cookie = base64.b64encode(rc4crypt(zlib.compress(metadata)[2:-4], crypt_cookie_key)).strip()
  742. # body = rc4crypt(body, crypt_payload_key)
  743. # else:
  744. # cookie = base64.b64encode(zlib.compress(metadata)[2:-4]).strip()
  745. # request_headers['Cookie'] = cookie
  746. # if body:
  747. # request_headers['Content-Length'] = str(len(body))
  748. # else:
  749. # request_method = 'GET'
  750. request_method = 'GET'
  751. request_fetchserver += '/ps/%s.gif' % uuid.uuid1()
  752. request_headers['X-GOA-PS1'] = base64.b64encode(deflate(metadata)).strip()
  753. if body:
  754. request_headers['X-GOA-PS2'] = base64.b64encode(deflate(body)).strip()
  755. body = ''
  756. if common.GAE_PAGESPEED:
  757. request_fetchserver = re.sub(r'^(\w+://)', r'\g<1>1-ps.googleusercontent.com/h/', request_fetchserver)
  758. else:
  759. metadata = deflate(metadata)
  760. body = '%s%s%s' % (struct.pack('!h', len(metadata)), metadata, body)
  761. if 'rc4' in options:
  762. request_headers['X-GOA-Options'] = 'rc4'
  763. body = rc4crypt(body, crypt_payload_key)
  764. request_headers['Content-Length'] = str(len(body))
  765. # post data
  766. need_crlf = 0 if common.GAE_MODE == 'https' else 1
  767. need_validate = common.GAE_VALIDATE
  768. cache_key = '%s:%d' % (common.HOST_POSTFIX_MAP['.appspot.com'], 443 if common.GAE_MODE == 'https' else 80)
  769. response = self.create_http_request(request_method, request_fetchserver, request_headers, body, timeout, crlf=need_crlf, validate=need_validate, cache_key=cache_key)
  770. response.app_status = response.status
  771. response.app_options = response.getheader('X-GOA-Options', '')
  772. if response.status != 200:
  773. return response
  774. data = response.read(4)
  775. if len(data) < 4:
  776. response.status = 502
  777. response.fp = io.BytesIO(b'connection aborted. too short leadbyte data=' + data)
  778. response.read = response.fp.read
  779. return response
  780. response.status, headers_length = struct.unpack('!hh', data)
  781. data = response.read(headers_length)
  782. if len(data) < headers_length:
  783. response.status = 502
  784. response.fp = io.BytesIO(b'connection aborted. too short headers data=' + data)
  785. response.read = response.fp.read
  786. return response
  787. if 'rc4' not in response.app_options:
  788. response.msg = httplib.HTTPMessage(io.BytesIO(inflate(data)))
  789. else:
  790. response.msg = httplib.HTTPMessage(io.BytesIO(inflate(rc4crypt(data, crypt_response_msg_key))))
  791. if crypt_response_fp_key and response.fp:
  792. response.fp = CipherFileObject(response.fp, RC4Cipher(crypt_response_fp_key))
  793. gae_appid = urlparse.urlsplit(self.fetchserver).netloc.split('.')[-3]
  794. if response.status == 206:
  795. logging.debug('%s "GAE %s %s %s" %s %s %s %s', self.handler.address_string(), method, url, self.handler.protocol_version, gae_appid, options, response.status, response.getheader('Content-Length', '-'))
  796. else:
  797. logging.info('%s "GAE %s %s %s" %s %s %s %s', self.handler.address_string(), method, url, self.handler.protocol_version, gae_appid, options, response.status, response.getheader('Content-Length', '-'))
  798. return response
  799. def __php_fetch(self, method, url, headers, body, timeout, **kwargs):
  800. if body:
  801. if len(body) < 10 * 1024 * 1024 and 'Content-Encoding' not in headers:
  802. zbody = deflate(body)
  803. if len(zbody) < len(body):
  804. body = zbody
  805. headers['Content-Encoding'] = 'deflate'
  806. headers['Content-Length'] = str(len(body))
  807. skip_headers = self.skip_headers
  808. metadata = 'G-Method:%s\nG-Url:%s\n%s%s' % (method, url, ''.join('G-%s:%s\n' % (k, v) for k, v in kwargs.items() if v), ''.join('%s:%s\n' % (k, v) for k, v in headers.items() if k not in skip_headers))
  809. metadata = deflate(metadata)
  810. app_body = b''.join((struct.pack('!h', len(metadata)), metadata, body))
  811. app_headers = {'Content-Length': len(app_body), 'Content-Type': 'application/octet-stream'}
  812. fetchserver = '%s?%s' % (self.fetchserver, random.random())
  813. crlf = 0
  814. cache_key = '%s//:%s' % urlparse.urlsplit(fetchserver)[:2]
  815. response = self.create_http_request('POST', fetchserver, app_headers, app_body, timeout, crlf=crlf, cache_key=cache_key)
  816. if not response:
  817. raise socket.error(errno.ECONNRESET, 'urlfetch %r return None' % url)
  818. if response.status >= 400:
  819. return response
  820. response.app_status = response.status
  821. need_decrypt = kwargs.get('password') and response.app_status == 200 and response.getheader('Content-Type', '') == 'image/gif' and response.fp
  822. if need_decrypt:
  823. response.fp = CipherFileObject(response.fp, XORCipher(kwargs['password'][0]))
  824. return response
  825. class BaseProxyHandlerFilter(object):
  826. """base proxy handler filter"""
  827. def filter(self, handler):
  828. raise NotImplementedError
  829. class SimpleProxyHandlerFilter(BaseProxyHandlerFilter):
  830. """simple proxy handler filter"""
  831. def filter(self, handler):
  832. if handler.command == 'CONNECT':
  833. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  834. else:
  835. return [handler.DIRECT, {}]
  836. class AuthFilter(BaseProxyHandlerFilter):
  837. """authorization filter"""
  838. auth_info = "Proxy authentication required"""
  839. white_list = set(['127.0.0.1'])
  840. def __init__(self, username, password):
  841. self.username = username
  842. self.password = password
  843. def check_auth_header(self, auth_header):
  844. method, _, auth_data = auth_header.partition(' ')
  845. if method == 'Basic':
  846. username, _, password = base64.b64decode(auth_data).partition(':')
  847. if username == self.username and password == self.password:
  848. return True
  849. return False
  850. def filter(self, handler):
  851. if self.white_list and handler.client_address[0] in self.white_list:
  852. return None
  853. auth_header = handler.headers.get('Proxy-Authorization') or getattr(handler, 'auth_header', None)
  854. if auth_header and self.check_auth_header(auth_header):
  855. handler.auth_header = auth_header
  856. else:
  857. headers = {'Access-Control-Allow-Origin': '*',
  858. 'Proxy-Authenticate': 'Basic realm="%s"' % self.auth_info,
  859. 'Content-Length': '0',
  860. 'Connection': 'keep-alive'}
  861. return [handler.MOCK, 407, headers, '']
  862. class SimpleProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
  863. """SimpleProxyHandler for GoAgent 3.x"""
  864. protocol_version = 'HTTP/1.1'
  865. ssl_version = ssl.PROTOCOL_SSLv23
  866. disable_transport_ssl = True
  867. scheme = 'http'
  868. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  869. bufsize = 256 * 1024
  870. max_timeout = 4
  871. connect_timeout = 2
  872. first_run_lock = threading.Lock()
  873. handler_filters = [SimpleProxyHandlerFilter()]
  874. sticky_filter = None
  875. def finish(self):
  876. """make python2 BaseHTTPRequestHandler happy"""
  877. try:
  878. BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
  879. except NetWorkIOError as e:
  880. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  881. raise
  882. def address_string(self):
  883. return '%s:%s' % self.client_address[:2]
  884. def send_response(self, code, message=None):
  885. if message is None:
  886. if code in self.responses:
  887. message = self.responses[code][0]
  888. else:
  889. message = ''
  890. if self.request_version != 'HTTP/0.9':
  891. self.wfile.write('%s %d %s\r\n' % (self.protocol_version, code, message))
  892. def send_header(self, keyword, value):
  893. """Send a MIME header."""
  894. base_send_header = BaseHTTPServer.BaseHTTPRequestHandler.send_header
  895. keyword = keyword.title()
  896. if keyword == 'Set-Cookie':
  897. for cookie in re.split(r', (?=[^ =]+(?:=|$))', value):
  898. base_send_header(self, keyword, cookie)
  899. elif keyword == 'Content-Disposition' and '"' not in value:
  900. value = re.sub(r'filename=([^"\']+)', 'filename="\\1"', value)
  901. base_send_header(self, keyword, value)
  902. else:
  903. base_send_header(self, keyword, value)
  904. def setup(self):
  905. if isinstance(self.__class__.first_run, collections.Callable):
  906. try:
  907. with self.__class__.first_run_lock:
  908. if isinstance(self.__class__.first_run, collections.Callable):
  909. self.first_run()
  910. self.__class__.first_run = None
  911. except StandardError as e:
  912. logging.exception('%s.first_run() return %r', self.__class__, e)
  913. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  914. self.__class__.do_CONNECT = self.__class__.do_METHOD
  915. self.__class__.do_GET = self.__class__.do_METHOD
  916. self.__class__.do_PUT = self.__class__.do_METHOD
  917. self.__class__.do_POST = self.__class__.do_METHOD
  918. self.__class__.do_HEAD = self.__class__.do_METHOD
  919. self.__class__.do_DELETE = self.__class__.do_METHOD
  920. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  921. self.setup()
  922. def handle_one_request(self):
  923. if not self.disable_transport_ssl and self.scheme == 'http':
  924. leadbyte = self.connection.recv(1, socket.MSG_PEEK)
  925. if leadbyte in ('\x80', '\x16'):
  926. server_name = ''
  927. if leadbyte == '\x16':
  928. for _ in xrange(2):
  929. leaddata = self.connection.recv(1024, socket.MSG_PEEK)
  930. if is_clienthello(leaddata):
  931. try:
  932. server_name = extract_sni_name(leaddata)
  933. finally:
  934. break
  935. try:
  936. certfile = CertUtil.get_cert(server_name or 'www.google.com')
  937. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  938. except StandardError as e:
  939. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  940. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  941. return
  942. self.connection = ssl_sock
  943. self.rfile = self.connection.makefile('rb', self.bufsize)
  944. self.wfile = self.connection.makefile('wb', 0)
  945. self.scheme = 'https'
  946. return BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self)
  947. def first_run(self):
  948. pass
  949. def gethostbyname2(self, hostname):
  950. return socket.gethostbyname_ex(hostname)[-1]
  951. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  952. return socket.create_connection((hostname, port), timeout)
  953. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  954. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  955. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version)
  956. return ssl_sock
  957. def create_http_request(self, method, url, headers, body, timeout, **kwargs):
  958. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  959. if netloc.rfind(':') <= netloc.rfind(']'):
  960. # no port number
  961. host = netloc
  962. port = 443 if scheme == 'https' else 80
  963. else:
  964. host, _, port = netloc.rpartition(':')
  965. port = int(port)
  966. if query:
  967. path += '?' + query
  968. if 'Host' not in headers:
  969. headers['Host'] = host
  970. if body and 'Content-Length' not in headers:
  971. headers['Content-Length'] = str(len(body))
  972. ConnectionType = httplib.HTTPSConnection if scheme == 'https' else httplib.HTTPConnection
  973. connection = ConnectionType(netloc, timeout=timeout)
  974. connection.request(method, path, body=body, headers=headers)
  975. response = connection.getresponse()
  976. return response
  977. def create_http_request_withserver(self, fetchserver, method, url, headers, body, timeout, **kwargs):
  978. return URLFetch(self, fetchserver).fetch(method, url, headers, body, timeout, **kwargs)
  979. def handle_urlfetch_error(self, fetchserver, response):
  980. pass
  981. def handle_urlfetch_response_close(self, fetchserver, response):
  982. pass
  983. def parse_header(self):
  984. if self.command == 'CONNECT':
  985. netloc = self.path
  986. elif self.path[0] == '/':
  987. netloc = self.headers.get('Host', 'localhost')
  988. self.path = '%s://%s%s' % (self.scheme, netloc, self.path)
  989. else:
  990. netloc = urlparse.urlsplit(self.path).netloc
  991. m = re.match(r'^(.+):(\d+)$', netloc)
  992. if m:
  993. self.host = m.group(1).strip('[]')
  994. self.port = int(m.group(2))
  995. else:
  996. self.host = netloc
  997. self.port = 443 if self.scheme == 'https' else 80
  998. def forward_socket(self, local, remote, timeout):
  999. try:
  1000. tick = 1
  1001. bufsize = self.bufsize
  1002. timecount = timeout
  1003. while 1:
  1004. timecount -= tick
  1005. if timecount <= 0:
  1006. break
  1007. (ins, _, errors) = select.select([local, remote], [], [local, remote], tick)
  1008. if errors:
  1009. break
  1010. for sock in ins:
  1011. data = sock.recv(bufsize)
  1012. if not data:
  1013. break
  1014. if sock is remote:
  1015. local.sendall(data)
  1016. timecount = timeout
  1017. else:
  1018. remote.sendall(data)
  1019. timecount = timeout
  1020. except socket.timeout:
  1021. pass
  1022. except NetWorkIOError as e:
  1023. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  1024. raise
  1025. if e.args[0] in (errno.EBADF,):
  1026. return
  1027. finally:
  1028. for sock in (remote, local):
  1029. try:
  1030. sock.close()
  1031. except StandardError:
  1032. pass
  1033. def MOCK(self, status, headers, content):
  1034. """mock response"""
  1035. logging.info('%s "MOCK %s %s %s" %d %d', self.address_string(), self.command, self.path, self.protocol_version, status, len(content))
  1036. headers = dict((k.title(), v) for k, v in headers.items())
  1037. if 'Transfer-Encoding' in headers:
  1038. del headers['Transfer-Encoding']
  1039. if 'Content-Length' not in headers:
  1040. headers['Content-Length'] = len(content)
  1041. if 'Connection' not in headers:
  1042. headers['Connection'] = 'close'
  1043. self.send_response(status)
  1044. for key, value in headers.items():
  1045. self.send_header(key, value)
  1046. self.end_headers()
  1047. self.wfile.write(content)
  1048. def STRIP(self, do_ssl_handshake=True, sticky_filter=None):
  1049. """strip connect"""
  1050. certfile = CertUtil.get_cert(self.host)
  1051. logging.info('%s "STRIP %s %s:%d %s" - -', self.address_string(), self.command, self.host, self.port, self.protocol_version)
  1052. self.send_response(200)
  1053. self.end_headers()
  1054. if do_ssl_handshake:
  1055. try:
  1056. # ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  1057. # bugfix for youtube-dl
  1058. ssl_sock = ssl.wrap_socket(self.connection, keyfile=certfile, certfile=certfile, server_side=True)
  1059. except StandardError as e:
  1060. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  1061. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  1062. return
  1063. self.connection = ssl_sock
  1064. self.rfile = self.connection.makefile('rb', self.bufsize)
  1065. self.wfile = self.connection.makefile('wb', 0)
  1066. self.scheme = 'https'
  1067. try:
  1068. self.raw_requestline = self.rfile.readline(65537)
  1069. if len(self.raw_requestline) > 65536:
  1070. self.requestline = ''
  1071. self.request_version = ''
  1072. self.command = ''
  1073. self.send_error(414)
  1074. return
  1075. if not self.raw_requestline:
  1076. self.close_connection = 1
  1077. return
  1078. if not self.parse_request():
  1079. return
  1080. except NetWorkIOError as e:
  1081. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1082. raise
  1083. self.sticky_filter = sticky_filter
  1084. try:
  1085. self.do_METHOD()
  1086. except NetWorkIOError as e:
  1087. if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
  1088. raise
  1089. def FORWARD(self, hostname, port, timeout, kwargs={}):
  1090. """forward socket"""
  1091. do_ssl_handshake = kwargs.pop('do_ssl_handshake', False)
  1092. local = self.connection
  1093. remote = None
  1094. self.send_response(200)
  1095. self.end_headers()
  1096. self.close_connection = 1
  1097. data = local.recv(1024)
  1098. if not data:
  1099. local.close()
  1100. return
  1101. data_is_clienthello = is_clienthello(data)
  1102. if data_is_clienthello:
  1103. kwargs['client_hello'] = data
  1104. max_retry = kwargs.get('max_retry', 5)
  1105. for i in xrange(max_retry):
  1106. try:
  1107. if do_ssl_handshake:
  1108. remote = self.create_ssl_connection(hostname, port, timeout, **kwargs)
  1109. else:
  1110. remote = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  1111. if not data_is_clienthello and remote and not isinstance(remote, Exception):
  1112. remote.sendall(data)
  1113. break
  1114. except StandardError as e:
  1115. logging.exception('%s "FWD %s %s:%d %s" %r', self.address_string(), self.command, hostname, port, self.protocol_version, e)
  1116. if hasattr(remote, 'close'):
  1117. remote.close()
  1118. if i == max_retry - 1:
  1119. raise
  1120. logging.info('%s "FWD %s %s:%d %s" - -', self.address_string(), self.command, hostname, port, self.protocol_version)
  1121. if hasattr(remote, 'fileno'):
  1122. # reset timeout default to avoid long http upload failure, but it will delay timeout retry :(
  1123. remote.settimeout(None)
  1124. del kwargs
  1125. data = data_is_clienthello and getattr(remote, 'data', None)
  1126. if data:
  1127. del remote.data
  1128. local.sendall(data)
  1129. self.forward_socket(local, remote, self.max_timeout)
  1130. def DIRECT(self, kwargs):
  1131. method = self.command
  1132. if 'url' in kwargs:
  1133. url = kwargs.pop('url')
  1134. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1135. url = self.path
  1136. else:
  1137. url = 'http://%s%s' % (self.headers['Host'], self.path)
  1138. headers = dict((k.title(), v) for k, v in self.headers.items())
  1139. body = self.body
  1140. response = None
  1141. try:
  1142. response = self.create_http_request(method, url, headers, body, timeout=self.connect_timeout, **kwargs)
  1143. logging.info('%s "DIRECT %s %s %s" %s %s', self.address_string(), self.command, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1144. response_headers = dict((k.title(), v) for k, v in response.getheaders())
  1145. self.send_response(response.status)
  1146. for key, value in response.getheaders():
  1147. self.send_header(key, value)
  1148. self.end_headers()
  1149. if self.command == 'HEAD' or response.status in (204, 304):
  1150. response.close()
  1151. return
  1152. need_chunked = 'Transfer-Encoding' in response_headers
  1153. while True:
  1154. data = response.read(8192)
  1155. if not data:
  1156. if need_chunked:
  1157. self.wfile.write('0\r\n\r\n')
  1158. break
  1159. if need_chunked:
  1160. self.wfile.write('%x\r\n' % len(data))
  1161. self.wfile.write(data)
  1162. if need_chunked:
  1163. self.wfile.write('\r\n')
  1164. del data
  1165. except (ssl.SSLError, socket.timeout, socket.error):
  1166. if response:
  1167. if response.fp and response.fp._sock:
  1168. response.fp._sock.close()
  1169. response.close()
  1170. finally:
  1171. if response:
  1172. response.close()
  1173. def URLFETCH(self, fetchservers, max_retry=5, kwargs={}):
  1174. """urlfetch from fetchserver"""
  1175. method = self.command
  1176. if self.path[0] == '/':
  1177. url = '%s://%s%s' % (self.scheme, self.headers['Host'], self.path)
  1178. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  1179. url = self.path
  1180. else:
  1181. raise ValueError('URLFETCH %r is not a valid url' % self.path)
  1182. headers = dict((k.title(), v) for k, v in self.headers.items())
  1183. body = self.body
  1184. response = None
  1185. errors = []
  1186. fetchserver = fetchservers[0]
  1187. for i in xrange(max_retry):
  1188. try:
  1189. response = self.create_http_request_withserver(fetchserver, method, url, headers, body, timeout=60, **kwargs)
  1190. if response.app_status < 400:
  1191. break
  1192. else:
  1193. self.handle_urlfetch_error(fetchserver, response)
  1194. if i < max_retry - 1:
  1195. if len(fetchservers) > 1:
  1196. fetchserver = random.choice(fetchservers[1:])
  1197. logging.info('URLFETCH return %d, trying fetchserver=%r', response.app_status, fetchserver)
  1198. response.close()
  1199. except StandardError as e:
  1200. errors.append(e)
  1201. logging.info('URLFETCH "%s %s" fetchserver=%r %r, retry...', method, url, fetchserver, e)
  1202. if len(errors) == max_retry:
  1203. if response and response.app_status >= 500:
  1204. status = response.app_status
  1205. headers = dict(response.getheaders())
  1206. content = response.read()
  1207. response.close()
  1208. else:
  1209. status = 502
  1210. headers = {'Content-Type': 'text/html'}
  1211. content = message_html('502 URLFetch failed', 'Local URLFetch %r failed' % url, '<br>'.join(repr(x) for x in errors))
  1212. return self.MOCK(status, headers, content)
  1213. #logging.info('%s "URL %s %s %s" %s %s', self.address_string(), method, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  1214. try:
  1215. if response.status == 206:
  1216. return self.RANGEFETCH(response, fetchservers, **kwargs)
  1217. if response.app_header_parsed:
  1218. self.close_connection = not response.getheader('Content-Length')
  1219. self.send_response(response.status)
  1220. for key, value in response.getheaders():
  1221. if key.title() == 'Transfer-Encoding':
  1222. continue
  1223. self.send_header(key, value)
  1224. self.end_headers()
  1225. bufsize = 8192
  1226. while True:
  1227. data = response.read(bufsize)
  1228. if data:
  1229. self.wfile.write(data)
  1230. if not data:
  1231. self.handle_urlfetch_response_close(fetchserver, response)
  1232. response.close()
  1233. break
  1234. del data
  1235. except NetWorkIOError as e:
  1236. if e[0] in (errno.ECONNABORTED, errno.EPIPE) or 'bad write retry' in repr(e):
  1237. return
  1238. def RANGEFETCH(self, response, fetchservers, **kwargs):
  1239. return RangeFetch(self, response, fetchservers, **kwargs).fetch()
  1240. def do_METHOD(self):
  1241. self.parse_header()
  1242. self.body = self.rfile.read(int(self.headers['Content-Length'])) if 'Content-Length' in self.headers else ''
  1243. if self.sticky_filter:
  1244. action = self.sticky_filter.filter(self)
  1245. if action:
  1246. return action.pop(0)(*action)
  1247. for handler_filter in self.handler_filters:
  1248. action = handler_filter.filter(self)
  1249. if action:
  1250. return action.pop(0)(*action)
  1251. class RangeFetch(object):
  1252. """Range Fetch Class"""
  1253. threads = 2
  1254. maxsize = 1024*1024*4
  1255. bufsize = 8192
  1256. waitsize = 1024*512
  1257. def __init__(self, handler, response, fetchservers, **kwargs):
  1258. self.handler = handler
  1259. self.url = handler.path
  1260. self.response = response
  1261. self.fetchservers = fetchservers
  1262. self.kwargs = kwargs
  1263. self._stopped = None
  1264. self._last_app_status = {}
  1265. self.expect_begin = 0
  1266. def fetch(self):
  1267. response_status = self.response.status
  1268. response_headers = dict((k.title(), v) for k, v in self.response.getheaders())
  1269. content_range = response_headers['Content-Range']
  1270. #content_length = response_headers['Content-Length']
  1271. start, end, length = tuple(int(x) for x in re.search(r'bytes (\d+)-(\d+)/(\d+)', content_range).group(1, 2, 3))
  1272. if start == 0:
  1273. response_status = 200
  1274. response_headers['Content-Length'] = str(length)
  1275. del response_headers['Content-Range']
  1276. else:
  1277. response_headers['Content-Range'] = 'bytes %s-%s/%s' % (start, end, length)
  1278. response_headers['Content-Length'] = str(length-start)
  1279. logging.info('>>>>>>>>>>>>>>> RangeFetch started(%r) %d-%d', self.url, start, end)
  1280. self.handler.send_response(response_status)
  1281. for key, value in response_headers.items():
  1282. self.handler.send_header(key, value)
  1283. self.handler.end_headers()
  1284. data_queue = Queue.PriorityQueue()
  1285. range_queue = Queue.PriorityQueue()
  1286. range_queue.put((start, end, self.response))
  1287. self.expect_begin = start
  1288. for begin in range(end+1, length, self.maxsize):
  1289. range_queue.put((begin, min(begin+self.maxsize-1, length-1), None))
  1290. for i in xrange(0, self.threads):
  1291. range_delay_size = i * self.maxsize
  1292. spawn_later(float(range_delay_size)/self.waitsize, self.__fetchlet, range_queue, data_queue, range_delay_size)
  1293. has_peek = hasattr(data_queue, 'peek')
  1294. peek_timeout = 120
  1295. while self.expect_begin < length - 1:
  1296. try:
  1297. if has_peek:
  1298. begin, data = data_queue.peek(timeout=peek_timeout)
  1299. if self.expect_begin == begin:
  1300. data_queue.get()
  1301. elif self.expect_begin < begin:
  1302. time.sleep(0.1)
  1303. continue
  1304. else:
  1305. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1306. break
  1307. else:
  1308. begin, data = data_queue.get(timeout=peek_timeout)
  1309. if self.expect_begin == begin:
  1310. pass
  1311. elif self.expect_begin < begin:
  1312. data_queue.put((begin, data))
  1313. time.sleep(0.1)
  1314. continue
  1315. else:
  1316. logging.error('RangeFetch Error: begin(%r) < expect_begin(%r), quit.', begin, self.expect_begin)
  1317. break
  1318. except Queue.Empty:
  1319. logging.error('data_queue peek timeout, break')
  1320. break
  1321. try:
  1322. self.handler.wfile.write(data)
  1323. self.expect_begin += len(data)
  1324. del data
  1325. except StandardError as e:
  1326. logging.info('RangeFetch client connection aborted(%s).', e)
  1327. break
  1328. self._stopped = True
  1329. def __fetchlet(self, range_queue, data_queue, range_delay_size):
  1330. headers = dict((k.title(), v) for k, v in self.handler.headers.items())
  1331. headers['Connection'] = 'close'
  1332. while 1:
  1333. try:
  1334. if self._stopped:
  1335. return
  1336. try:
  1337. start, end, response = range_queue.get(timeout=1)
  1338. if self.expect_begin < start and data_queue.qsize() * self.bufsize + range_delay_size > 30*1024*1024:
  1339. range_queue.put((start, end, response))
  1340. time.sleep(10)
  1341. continue
  1342. headers['Range'] = 'bytes=%d-%d' % (start, end)
  1343. fetchserver = ''
  1344. if not response:
  1345. fetchserver = random.choice(self.fetchservers)
  1346. if self._last_app_status.get(fetchserver, 200) >= 500:
  1347. time.sleep(5)
  1348. response = self.handler.create_http_request_withserver(fetchserver, self.handler.command, self.url, headers, self.handler.body, timeout=self.handler.connect_timeout, **self.kwargs)
  1349. except Queue.Empty:
  1350. continue
  1351. except StandardError as e:
  1352. logging.warning("Response %r in __fetchlet", e)
  1353. range_queue.put((start, end, None))
  1354. continue
  1355. if not response:
  1356. logging.warning('RangeFetch %s return %r', headers['Range'], response)
  1357. range_queue.put((start, end, None))
  1358. continue
  1359. gae_appid = ''
  1360. if fetchserver:
  1361. self._last_app_status[fetchserver] = response.app_status
  1362. gae_appid = urlparse.urlsplit(fetchserver).netloc.split('.')[-3]
  1363. if response.app_status != 200:
  1364. logging.warning('Range Fetch "%s %s" %s return %s', self.handler.command, self.url, headers['Range'], response.app_status)
  1365. response.close()
  1366. range_queue.put((start, end, None))
  1367. continue
  1368. if response.getheader('Location'):
  1369. self.url = urlparse.urljoin(self.url, response.getheader('Location'))
  1370. logging.info('RangeFetch Redirect(%r)', self.url)
  1371. response.close()
  1372. range_queue.put((start, end, None))
  1373. continue
  1374. if 200 <= response.status < 300:
  1375. content_range = response.getheader('Content-Range')
  1376. if not content_range:
  1377. logging.warning('RangeFetch "%s %s" return Content-Range=%r: response headers=%r', self.handler.command, self.url, content_range, response.getheaders())
  1378. response.close()
  1379. range_queue.put((start, end, None))
  1380. continue
  1381. content_length = int(response.getheader('Content-Length', 0))
  1382. logging.info('>>>>>>>>>>>>>>> [thread %s] %s %s %s %s', threading.currentThread().ident, content_length, content_range, gae_appid, self.kwargs['options'])
  1383. while 1:
  1384. try:
  1385. if self._stopped:
  1386. response.close()
  1387. return
  1388. data = response.read(self.bufsize)
  1389. if not data:
  1390. break
  1391. data_queue.put((start, data))
  1392. start += len(data)
  1393. except StandardError as e:
  1394. logging.warning('RangeFetch "%s %s" %s failed: %s', self.handler.command, self.url, headers['Range'], e)
  1395. break
  1396. if start < end + 1:
  1397. logging.warning('RangeFetch "%s %s" retry %s-%s', self.handler.command, self.url, start, end)
  1398. response.close()
  1399. range_queue.put((start, end, None))
  1400. continue
  1401. logging.info('>>>>>>>>>>>>>>> Successfully reached %d bytes.', start - 1)
  1402. else:
  1403. logging.error('RangeFetch %r return %s', self.url, response.status)
  1404. response.close()
  1405. range_queue.put((start, end, None))
  1406. continue
  1407. except StandardError as e:
  1408. logging.exception('RangeFetch._fetchlet error:%s', e)
  1409. raise
  1410. class AdvancedProxyHandler(SimpleProxyHandler):
  1411. """Advanced Proxy Handler"""
  1412. dns_cache = LRUCache(64*1024)
  1413. dns_servers = []
  1414. dns_blacklist = []
  1415. tcp_connection_time = collections.defaultdict(float)
  1416. tcp_connection_time_with_clienthello = collections.defaultdict(float)
  1417. tcp_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1418. ssl_connection_time = collections.defaultdict(float)
  1419. ssl_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1420. ssl_connection_good_ipaddrs = {}
  1421. ssl_connection_bad_ipaddrs = {}
  1422. ssl_connection_unknown_ipaddrs = {}
  1423. ssl_connection_keepalive = False
  1424. max_window = 4
  1425. openssl_context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
  1426. def gethostbyname2(self, hostname):
  1427. try:
  1428. iplist = self.dns_cache[hostname]
  1429. except KeyError:
  1430. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  1431. iplist = [hostname]
  1432. elif self.dns_servers:
  1433. try:
  1434. record = dnslib_resolve_over_udp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1435. except socket.gaierror:
  1436. record = dnslib_resolve_over_tcp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1437. iplist = dnslib_record2iplist(record)
  1438. else:
  1439. iplist = socket.gethostbyname_ex(hostname)[-1]
  1440. self.dns_cache[hostname] = iplist
  1441. return iplist
  1442. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  1443. client_hello = kwargs.get('client_hello', None)
  1444. cache_key = kwargs.get('cache_key') if not client_hello else None
  1445. def create_connection(ipaddr, timeout, queobj):
  1446. sock = None
  1447. try:
  1448. # create a ipv4/ipv6 socket object
  1449. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1450. # set reuseaddr option to avoid 10048 socket error
  1451. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1452. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1453. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1454. # disable nagle algorithm to send http request quickly.
  1455. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1456. # set a short timeout to trigger timeout retry more quickly.
  1457. sock.settimeout(min(self.connect_timeout, timeout))
  1458. # start connection time record
  1459. start_time = time.time()
  1460. # TCP connect
  1461. sock.connect(ipaddr)
  1462. # record TCP connection time
  1463. self.tcp_connection_time[ipaddr] = time.time() - start_time
  1464. # send client hello and peek server hello
  1465. if client_hello:
  1466. sock.sendall(client_hello)
  1467. if gevent and isinstance(sock, gevent.socket.socket):
  1468. sock.data = data = sock.recv(4096)
  1469. else:
  1470. data = sock.recv(4096, socket.MSG_PEEK)
  1471. if not data:
  1472. logging.debug('create_tcp_connection %r with client_hello return NULL byte, continue %r', ipaddr, time.time()-start_time)
  1473. raise socket.timeout('timed out')
  1474. # record TCP connection time with client hello
  1475. self.tcp_connection_time_with_clienthello[ipaddr] = time.time() - start_time
  1476. # set timeout
  1477. sock.settimeout(timeout)
  1478. # put tcp socket object to output queobj
  1479. queobj.put(sock)
  1480. except (socket.error, OSError) as e:
  1481. # any socket.error, put Excpetions to output queobj.
  1482. queobj.put(e)
  1483. # reset a large and random timeout to the ipaddr
  1484. self.tcp_connection_time[ipaddr] = self.connect_timeout+random.random()
  1485. # close tcp socket
  1486. if sock:
  1487. sock.close()
  1488. def close_connection(count, queobj, first_tcp_time):
  1489. for _ in range(count):
  1490. sock = queobj.get()
  1491. tcp_time_threshold = min(1, 1.3 * first_tcp_time)
  1492. if sock and not isinstance(sock, Exception):
  1493. ipaddr = sock.getpeername()
  1494. if cache_key and self.tcp_connection_time[ipaddr] < tcp_time_threshold:
  1495. cache_queue = self.tcp_connection_cache[cache_key]
  1496. if cache_queue.qsize() < 8:
  1497. try:
  1498. _, old_sock = cache_queue.get_nowait()
  1499. old_sock.close()
  1500. except Queue.Empty:
  1501. pass
  1502. cache_queue.put((time.time(), sock))
  1503. else:
  1504. sock.close()
  1505. try:
  1506. while cache_key:
  1507. ctime, sock = self.tcp_connection_cache[cache_key].get_nowait()
  1508. if time.time() - ctime < 30:
  1509. return sock
  1510. else:
  1511. sock.close()
  1512. except Queue.Empty:
  1513. pass
  1514. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1515. sock = None
  1516. for _ in range(kwargs.get('max_retry', 5)):
  1517. window = min((self.max_window+1)//2, len(addresses))
  1518. if client_hello:
  1519. addresses.sort(key=self.tcp_connection_time_with_clienthello.__getitem__)
  1520. else:
  1521. addresses.sort(key=self.tcp_connection_time.__getitem__)
  1522. addrs = addresses[:window] + random.sample(addresses, window)
  1523. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1524. for addr in addrs:
  1525. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  1526. for i in range(len(addrs)):
  1527. sock = queobj.get()
  1528. if not isinstance(sock, Exception):
  1529. first_tcp_time = self.tcp_connection_time[sock.getpeername()] if not cache_key else 0
  1530. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, first_tcp_time))
  1531. return sock
  1532. elif i == 0:
  1533. # only output first error
  1534. logging.warning('create_tcp_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1535. if isinstance(sock, Exception):
  1536. raise sock
  1537. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  1538. cache_key = kwargs.get('cache_key')
  1539. validate = kwargs.get('validate')
  1540. def create_connection(ipaddr, timeout, queobj):
  1541. sock = None
  1542. ssl_sock = None
  1543. try:
  1544. # create a ipv4/ipv6 socket object
  1545. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1546. # set reuseaddr option to avoid 10048 socket error
  1547. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1548. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1549. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1550. # disable negal algorithm to send http request quickly.
  1551. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1552. # set a short timeout to trigger timeout retry more quickly.
  1553. sock.settimeout(min(self.connect_timeout, timeout))
  1554. # pick up the certificate
  1555. if not validate:
  1556. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, do_handshake_on_connect=False)
  1557. else:
  1558. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, cert_reqs=ssl.CERT_REQUIRED, ca_certs=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cacert.pem'), do_handshake_on_connect=False)
  1559. ssl_sock.settimeout(min(self.connect_timeout, timeout))
  1560. # start connection time record
  1561. start_time = time.time()
  1562. # TCP connect
  1563. ssl_sock.connect(ipaddr)
  1564. connected_time = time.time()
  1565. # SSL handshake
  1566. ssl_sock.do_handshake()
  1567. handshaked_time = time.time()
  1568. # record TCP connection time
  1569. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1570. # record SSL connection time
  1571. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1572. ssl_sock.ssl_time = connected_time - start_time
  1573. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1574. ssl_sock.sock = sock
  1575. # remove from bad/unknown ipaddrs dict
  1576. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1577. self.ssl_connection_unknown_ipaddrs.pop(ipaddr, None)
  1578. # add to good ipaddrs dict
  1579. if ipaddr not in self.ssl_connection_good_ipaddrs:
  1580. self.ssl_connection_good_ipaddrs[ipaddr] = handshaked_time
  1581. # verify SSL certificate.
  1582. if validate and hostname.endswith('.appspot.com'):
  1583. cert = ssl_sock.getpeercert()
  1584. orgname = next((v for ((k, v),) in cert['subject'] if k == 'organizationName'))
  1585. if not orgname.lower().startswith('google '):
  1586. raise ssl.SSLError("%r certificate organizationName(%r) not startswith 'Google'" % (hostname, orgname))
  1587. # set timeout
  1588. ssl_sock.settimeout(timeout)
  1589. # put ssl socket object to output queobj
  1590. queobj.put(ssl_sock)
  1591. except (socket.error, ssl.SSLError, OSError) as e:
  1592. # any socket.error, put Excpetions to output queobj.
  1593. queobj.put(e)
  1594. # reset a large and random timeout to the ipaddr
  1595. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1596. # add to bad ipaddrs dict
  1597. if ipaddr not in self.ssl_connection_bad_ipaddrs:
  1598. self.ssl_connection_bad_ipaddrs[ipaddr] = time.time()
  1599. # remove from good/unknown ipaddrs dict
  1600. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1601. self.ssl_connection_unknown_ipaddrs.pop(ipaddr, None)
  1602. # close ssl socket
  1603. if ssl_sock:
  1604. ssl_sock.close()
  1605. # close tcp socket
  1606. if sock:
  1607. sock.close()
  1608. def create_connection_withopenssl(ipaddr, timeout, queobj):
  1609. sock = None
  1610. ssl_sock = None
  1611. try:
  1612. # create a ipv4/ipv6 socket object
  1613. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1614. # set reuseaddr option to avoid 10048 socket error
  1615. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1616. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1617. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1618. # disable negal algorithm to send http request quickly.
  1619. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1620. # set a short timeout to trigger timeout retry more quickly.
  1621. sock.settimeout(timeout or self.connect_timeout)
  1622. # pick up the certificate
  1623. server_hostname = b'www.googleapis.com' if hostname.endswith('.appspot.com') else None
  1624. ssl_sock = SSLConnection(self.openssl_context, sock)
  1625. ssl_sock.set_connect_state()
  1626. if server_hostname and hasattr(ssl_sock, 'set_tlsext_host_name'):
  1627. ssl_sock.set_tlsext_host_name(server_hostname)
  1628. # start connection time record
  1629. start_time = time.time()
  1630. # TCP connect
  1631. ssl_sock.connect(ipaddr)
  1632. connected_time = time.time()
  1633. # SSL handshake
  1634. ssl_sock.do_handshake()
  1635. handshaked_time = time.time()
  1636. # record TCP connection time
  1637. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1638. # record SSL connection time
  1639. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1640. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1641. ssl_sock.sock = sock
  1642. # remove from bad/unknown ipaddrs dict
  1643. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1644. self.ssl_connection_unknown_ipaddrs.pop(ipaddr, None)
  1645. # add to good ipaddrs dict
  1646. if ipaddr not in self.ssl_connection_good_ipaddrs:
  1647. self.ssl_connection_good_ipaddrs[ipaddr] = handshaked_time
  1648. # verify SSL certificate.
  1649. if validate and hostname.endswith('.appspot.com'):
  1650. cert = ssl_sock.get_peer_certificate()
  1651. commonname = next((v for k, v in cert.get_subject().get_components() if k == 'CN'))
  1652. if '.google' not in commonname and not commonname.endswith('.appspot.com'):
  1653. raise socket.error("Host name '%s' doesn't match certificate host '%s'" % (hostname, commonname))
  1654. # put ssl socket object to output queobj
  1655. queobj.put(ssl_sock)
  1656. except (socket.error, OpenSSL.SSL.Error, OSError) as e:
  1657. # any socket.error, put Excpetions to output queobj.
  1658. queobj.put(e)
  1659. # reset a large and random timeout to the ipaddr
  1660. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1661. # add to bad ipaddrs dict
  1662. if ipaddr not in self.ssl_connection_bad_ipaddrs:
  1663. self.ssl_connection_bad_ipaddrs[ipaddr] = time.time()
  1664. # remove from good/unknown ipaddrs dict
  1665. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1666. self.ssl_connection_unknown_ipaddrs.pop(ipaddr, None)
  1667. # close ssl socket
  1668. if ssl_sock:
  1669. ssl_sock.close()
  1670. # close tcp socket
  1671. if sock:
  1672. sock.close()
  1673. def close_connection(count, queobj, first_tcp_time, first_ssl_time):
  1674. for _ in range(count):
  1675. sock = queobj.get()
  1676. ssl_time_threshold = min(1, 1.3 * first_ssl_time)
  1677. if sock and not isinstance(sock, Exception):
  1678. if cache_key and sock.ssl_time < ssl_time_threshold:
  1679. cache_queue = self.ssl_connection_cache[cache_key]
  1680. if cache_queue.qsize() < 8:
  1681. try:
  1682. _, old_sock = cache_queue.get_nowait()
  1683. old_sock.close()
  1684. except Queue.Empty:
  1685. pass
  1686. cache_queue.put((time.time(), sock))
  1687. else:
  1688. sock.close()
  1689. def reorg_ipaddrs():
  1690. current_time = time.time()
  1691. for ipaddr, ctime in self.ssl_connection_good_ipaddrs.items():
  1692. if current_time - ctime > 4 * 60:
  1693. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1694. self.ssl_connection_unknown_ipaddrs[ipaddr] = ctime
  1695. for ipaddr, ctime in self.ssl_connection_bad_ipaddrs.items():
  1696. if current_time - ctime > 6 * 60:
  1697. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1698. self.ssl_connection_unknown_ipaddrs[ipaddr] = ctime
  1699. logging.info("good_ipaddrs=%d, bad_ipaddrs=%d, unkown_ipaddrs=%d", len(self.ssl_connection_good_ipaddrs), len(self.ssl_connection_bad_ipaddrs), len(self.ssl_connection_unknown_ipaddrs))
  1700. try:
  1701. while cache_key:
  1702. ctime, sock = self.ssl_connection_cache[cache_key].get_nowait()
  1703. if time.time() - ctime < 30:
  1704. return sock
  1705. else:
  1706. sock.close()
  1707. except Queue.Empty:
  1708. pass
  1709. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1710. sock = None
  1711. for i in range(kwargs.get('max_retry', 10)):
  1712. reorg_ipaddrs()
  1713. window = self.max_window + i
  1714. good_ipaddrs = [x for x in addresses if x in self.ssl_connection_good_ipaddrs]
  1715. good_ipaddrs = sorted(good_ipaddrs, key=self.ssl_connection_time.get)[:window]
  1716. unkown_ipaddrs = [x for x in addresses if x not in self.ssl_connection_good_ipaddrs and x not in self.ssl_connection_bad_ipaddrs]
  1717. random.shuffle(unkown_ipaddrs)
  1718. unkown_ipaddrs = unkown_ipaddrs[:window]
  1719. bad_ipaddrs = [x for x in addresses if x in self.ssl_connection_bad_ipaddrs]
  1720. bad_ipaddrs = sorted(bad_ipaddrs, key=self.ssl_connection_bad_ipaddrs.get)[:window]
  1721. addrs = good_ipaddrs + unkown_ipaddrs + bad_ipaddrs
  1722. remain_window = 3 * window - len(addrs)
  1723. if 0 < remain_window <= len(addresses):
  1724. addrs += random.sample(addresses, remain_window)
  1725. logging.debug('%s good_ipaddrs=%d, unkown_ipaddrs=%r, bad_ipaddrs=%r', cache_key, len(good_ipaddrs), len(unkown_ipaddrs), len(bad_ipaddrs))
  1726. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1727. for addr in addrs:
  1728. thread.start_new_thread(create_connection_withopenssl, (addr, timeout, queobj))
  1729. for i in range(len(addrs)):
  1730. sock = queobj.get()
  1731. if not isinstance(sock, Exception):
  1732. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, sock.tcp_time, sock.ssl_time))
  1733. return sock
  1734. elif i == 0:
  1735. # only output first error
  1736. logging.warning('create_ssl_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1737. if isinstance(sock, Exception):
  1738. raise sock
  1739. def create_http_request(self, method, url, headers, body, timeout, max_retry=5, bufsize=8192, crlf=None, validate=None, cache_key=None):
  1740. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  1741. if netloc.rfind(':') <= netloc.rfind(']'):
  1742. # no port number
  1743. host = netloc
  1744. port = 443 if scheme == 'https' else 80
  1745. else:
  1746. host, _, port = netloc.rpartition(':')
  1747. port = int(port)
  1748. if query:
  1749. path += '?' + query
  1750. if 'Host' not in headers:
  1751. headers['Host'] = host
  1752. if body and 'Content-Length' not in headers:
  1753. headers['Content-Length'] = str(len(body))
  1754. sock = None
  1755. for i in range(max_retry):
  1756. try:
  1757. create_connection = self.create_ssl_connection if scheme == 'https' else self.create_tcp_connection
  1758. sock = create_connection(host, port, timeout, validate=validate, cache_key=cache_key)
  1759. break
  1760. except StandardError as e:
  1761. logging.exception('create_http_request "%s %s" failed:%s', method, url, e)
  1762. if sock:
  1763. sock.close()
  1764. if i == max_retry - 1:
  1765. raise
  1766. request_data = ''
  1767. crlf_counter = 0
  1768. if scheme != 'https' and crlf:
  1769. fakeheaders = dict((k.title(), v) for k, v in headers.items())
  1770. fakeheaders.pop('Content-Length', None)
  1771. fakeheaders.pop('Cookie', None)
  1772. fakeheaders.pop('Host', None)
  1773. if 'User-Agent' not in fakeheaders:
  1774. fakeheaders['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1878.0 Safari/537.36'
  1775. if 'Accept-Language' not in fakeheaders:
  1776. fakeheaders['Accept-Language'] = 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4'
  1777. if 'Accept' not in fakeheaders:
  1778. fakeheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
  1779. fakeheaders_data = ''.join('%s: %s\r\n' % (k, v) for k, v in fakeheaders.items() if k not in self.skip_headers)
  1780. while crlf_counter < 5 or len(request_data) < 1500 * 2:
  1781. request_data += 'GET / HTTP/1.1\r\n%s\r\n' % fakeheaders_data
  1782. crlf_counter += 1
  1783. request_data += '\r\n\r\n\r\n'
  1784. request_data += '%s %s %s\r\n' % (method, path, self.protocol_version)
  1785. request_data += ''.join('%s: %s\r\n' % (k.title(), v) for k, v in headers.items() if k.title() not in self.skip_headers)
  1786. request_data += '\r\n'
  1787. if isinstance(body, bytes):
  1788. sock.sendall(request_data.encode() + body)
  1789. elif hasattr(body, 'read'):
  1790. sock.sendall(request_data)
  1791. while 1:
  1792. data = body.read(bufsize)
  1793. if not data:
  1794. break
  1795. sock.sendall(data)
  1796. else:
  1797. raise TypeError('create_http_request(body) must be a string or buffer, not %r' % type(body))
  1798. response = None
  1799. try:
  1800. while crlf_counter:
  1801. if sys.version[:3] == '2.7':
  1802. response = httplib.HTTPResponse(sock, buffering=False)
  1803. else:
  1804. response = httplib.HTTPResponse(sock)
  1805. response.fp.close()
  1806. response.fp = sock.makefile('rb', 0)
  1807. response.begin()
  1808. response.read()
  1809. response.close()
  1810. crlf_counter -= 1
  1811. except StandardError as e:
  1812. logging.exception('crlf skip read host=%r path=%r error: %r', headers.get('Host'), path, e)
  1813. if response:
  1814. if response.fp and response.fp._sock:
  1815. response.fp._sock.close()
  1816. response.close()
  1817. if sock:
  1818. sock.close()
  1819. return None
  1820. if sys.version[:3] == '2.7':
  1821. response = httplib.HTTPResponse(sock, buffering=True)
  1822. else:
  1823. response = httplib.HTTPResponse(sock)
  1824. response.fp.close()
  1825. response.fp = sock.makefile('rb')
  1826. response.begin()
  1827. if self.ssl_connection_keepalive and scheme == 'https' and cache_key:
  1828. response.cache_key = cache_key
  1829. response.cache_sock = response.fp._sock
  1830. return response
  1831. def handle_urlfetch_response_close(self, fetchserver, response):
  1832. cache_sock = getattr(response, 'cache_sock', None)
  1833. if cache_sock:
  1834. if self.scheme == 'https':
  1835. self.ssl_connection_cache[response.cache_key].put((time.time(), cache_sock))
  1836. else:
  1837. cache_sock.close()
  1838. del response.cache_sock
  1839. def handle_urlfetch_error(self, fetchserver, response):
  1840. pass
  1841. class Common(object):
  1842. """Global Config Object"""
  1843. ENV_CONFIG_PREFIX = 'GOAGENT_'
  1844. def __init__(self):
  1845. """load config from proxy.ini"""
  1846. ConfigParser.RawConfigParser.OPTCRE = re.compile(r'(?P<option>[^=\s][^=]*)\s*(?P<vi>[=])\s*(?P<value>.*)$')
  1847. self.CONFIG = ConfigParser.ConfigParser()
  1848. self.CONFIG_FILENAME = os.path.splitext(os.path.abspath(__file__))[0]+'.ini'
  1849. self.CONFIG_USER_FILENAME = re.sub(r'\.ini$', '.user.ini', self.CONFIG_FILENAME)
  1850. self.CONFIG_MY_FILENAME = re.sub(r'\.ini$', '.my.ini', self.CONFIG_FILENAME)
  1851. self.CONFIG.read([self.CONFIG_FILENAME, self.CONFIG_USER_FILENAME, self.CONFIG_MY_FILENAME])
  1852. for key, value in os.environ.items():
  1853. m = re.match(r'^%s([A-Z]+)_([A-Z\_\-]+)$' % self.ENV_CONFIG_PREFIX, key)
  1854. if m:
  1855. self.CONFIG.set(m.group(1).lower(), m.group(2).lower(), value)
  1856. self.LISTEN_IP = self.CONFIG.get('listen', 'ip')
  1857. self.LISTEN_PORT = self.CONFIG.getint('listen', 'port')
  1858. self.LISTEN_USERNAME = self.CONFIG.get('listen', 'username') if self.CONFIG.has_option('listen', 'username') else ''
  1859. self.LISTEN_PASSWORD = self.CONFIG.get('listen', 'password') if self.CONFIG.has_option('listen', 'password') else ''
  1860. self.LISTEN_VISIBLE = self.CONFIG.getint('listen', 'visible')
  1861. self.LISTEN_DEBUGINFO = self.CONFIG.getint('listen', 'debuginfo')
  1862. self.GAE_APPIDS = re.findall(r'[\w\-\.]+', self.CONFIG.get('gae', 'appid').replace('.appspot.com', ''))
  1863. if len(self.GAE_APPIDS) > 50 : random.shuffle(self.GAE_APPIDS)
  1864. self.GAE_PASSWORD = self.CONFIG.get('gae', 'password').strip()
  1865. self.GAE_PATH = self.CONFIG.get('gae', 'path')
  1866. self.GAE_MODE = self.CONFIG.get('gae', 'mode')
  1867. self.GAE_PROFILE = self.CONFIG.get('gae', 'profile').strip()
  1868. self.GAE_WINDOW = self.CONFIG.getint('gae', 'window')
  1869. self.GAE_KEEPALIVE = self.CONFIG.getint('gae', 'keepalive') if self.CONFIG.has_option('gae', 'keepalive') else 0
  1870. self.GAE_OBFUSCATE = self.CONFIG.getint('gae', 'obfuscate')
  1871. self.GAE_VALIDATE = self.CONFIG.getint('gae', 'validate')
  1872. self.GAE_TRANSPORT = self.CONFIG.getint('gae', 'transport') if self.CONFIG.has_option('gae', 'transport') else 0
  1873. self.GAE_OPTIONS = self.CONFIG.get('gae', 'options')
  1874. self.GAE_REGIONS = set(x.upper() for x in self.CONFIG.get('gae', 'regions').split('|') if x.strip())
  1875. self.GAE_SSLVERSION = self.CONFIG.get('gae', 'sslversion')
  1876. self.GAE_PAGESPEED = self.CONFIG.getint('gae', 'pagespeed') if self.CONFIG.has_option('gae', 'pagespeed') else 0
  1877. if self.CONFIG.has_section('rangefetch'):
  1878. self.RANGEFETCH_APPIDS = re.findall(r'[\w\-\.]+', self.CONFIG.get('rangefetch', 'appid').replace('.appspot.com', '')) if self.CONFIG.has_option('rangefetch', 'appid') else self.GAE_APPIDS
  1879. if len(self.RANGEFETCH_APPIDS) > 50 : random.shuffle(self.RANGEFETCH_APPIDS)
  1880. self.RANGEFETCH_PASSWORD = self.CONFIG.get('rangefetch', 'password').strip() if self.CONFIG.has_option('rangefetch', 'password') else self.GAE_PASSWORD
  1881. self.RANGEFETCH_PATH = self.CONFIG.get('rangefetch', 'path') if self.CONFIG.has_option('rangefetch', 'path') else self.GAE_PATH
  1882. self.RANGEFETCH_OPTIONS = self.CONFIG.get('rangefetch', 'options') if self.CONFIG.has_option('rangefetch', 'options') else self.GAE_OPTIONS
  1883. else:
  1884. self.RANGEFETCH_APPIDS = self.GAE_APPIDS
  1885. self.RANGEFETCH_PASSWORD = self.GAE_PASSWORD
  1886. self.RANGEFETCH_PATH = self.GAE_PATH
  1887. self.RANGEFETCH_OPTIONS = self.GAE_OPTIONS
  1888. if self.GAE_PROFILE == 'auto':
  1889. try:
  1890. socket.create_connection(('2001:4860:4860::8888', 53), timeout=1).close()
  1891. logging.info('Use profile ipv6')
  1892. self.GAE_PROFILE = 'ipv6'
  1893. except socket.error as e:
  1894. logging.info('Fail try profile ipv6 %r, fallback ipv4', e)
  1895. self.GAE_PROFILE = 'ipv4'
  1896. hosts_section, http_section = '%s/hosts' % self.GAE_PROFILE, '%s/http' % self.GAE_PROFILE
  1897. if 'USERDNSDOMAIN' in os.environ and re.match(r'^\w+\.\w+$', os.environ['USERDNSDOMAIN']):
  1898. self.CONFIG.set(hosts_section, '.' + os.environ['USERDNSDOMAIN'], '')
  1899. self.HOST_MAP = collections.OrderedDict((k, v or k) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and not k.startswith('.'))
  1900. self.HOST_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and k.startswith('.'))
  1901. self.HOST_POSTFIX_ENDSWITH = tuple(self.HOST_POSTFIX_MAP)
  1902. self.HOSTPORT_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and not k.startswith('.'))
  1903. self.HOSTPORT_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and k.startswith('.'))
  1904. self.HOSTPORT_POSTFIX_ENDSWITH = tuple(self.HOSTPORT_POSTFIX_MAP)
  1905. self.URLRE_MAP = collections.OrderedDict((re.compile(k).match, v) for k, v in self.CONFIG.items(hosts_section) if '\\' in k)
  1906. self.HTTP_WITHGAE = tuple(x for x in self.CONFIG.get(http_section, 'withgae').split('|') if x)
  1907. self.HTTP_CRLFSITES = tuple(self.CONFIG.get(http_section, 'crlfsites').split('|'))
  1908. self.HTTP_FORCEHTTPS = tuple(self.CONFIG.get(http_section, 'forcehttps').split('|')) if self.CONFIG.get(http_section, 'forcehttps').strip() else tuple()
  1909. self.HTTP_NOFORCEHTTPS = tuple(self.CONFIG.get(http_section, 'noforcehttps').split('|')) if self.CONFIG.get(http_section, 'noforcehttps').strip() else set()
  1910. self.HTTP_FAKEHTTPS = tuple(self.CONFIG.get(http_section, 'fakehttps').split('|')) if self.CONFIG.get(http_section, 'fakehttps').strip() else tuple()
  1911. self.HTTP_NOFAKEHTTPS = tuple(self.CONFIG.get(http_section, 'nofakehttps').split('|')) if self.CONFIG.get(http_section, 'nofakehttps').strip() else set()
  1912. self.HTTP_DNS = self.CONFIG.get(http_section, 'dns').split('|') if self.CONFIG.has_option(http_section, 'dns') else []
  1913. self.IPLIST_MAP = collections.OrderedDict((k, v.split('|')) for k, v in self.CONFIG.items('iplist'))
  1914. self.IPLIST_MAP.update((k, [k]) for k, v in self.HOST_MAP.items() if k == v)
  1915. self.PAC_ENABLE = self.CONFIG.getint('pac', 'enable')
  1916. self.PAC_IP = self.CONFIG.get('pac', 'ip')
  1917. self.PAC_PORT = self.CONFIG.getint('pac', 'port')
  1918. self.PAC_FILE = self.CONFIG.get('pac', 'file').lstrip('/')
  1919. self.PAC_WHITELIST = self.CONFIG.has_option('pac', 'whitelist')
  1920. self.PAC_GFWLIST = self.CONFIG.get('pac', 'whitelist') if self.PAC_WHITELIST else self.CONFIG.get('pac', 'gfwlist')
  1921. self.PAC_ADBLOCK = self.CONFIG.get('pac', 'adblock') if self.CONFIG.has_option('pac', 'adblock') else ''
  1922. self.PAC_ADMODE = self.CONFIG.getint('pac', 'admode')
  1923. self.PAC_EXPIRED = self.CONFIG.getint('pac', 'expired')
  1924. self.PHP_ENABLE = self.CONFIG.getint('php', 'enable')
  1925. self.PHP_LISTEN = self.CONFIG.get('php', 'listen')
  1926. self.PHP_PASSWORD = self.CONFIG.get('php', 'password') if self.CONFIG.has_option('php', 'password') else ''
  1927. self.PHP_CRLF = self.CONFIG.getint('php', 'crlf') if self.CONFIG.has_option('php', 'crlf') else 1
  1928. self.PHP_VALIDATE = self.CONFIG.getint('php', 'validate') if self.CONFIG.has_option('php', 'validate') else 0
  1929. self.PHP_FETCHSERVER = self.CONFIG.get('php', 'fetchserver')
  1930. self.PHP_USEHOSTS = self.CONFIG.getint('php', 'usehosts')
  1931. self.PROXY_ENABLE = self.CONFIG.getint('proxy', 'enable')
  1932. self.PROXY_HOST = self.CONFIG.get('proxy', 'host')
  1933. self.PROXY_PORT = self.CONFIG.getint('proxy', 'port')
  1934. self.PROXY_USERNAME = self.CONFIG.get('proxy', 'username')
  1935. self.PROXY_PASSWROD = self.CONFIG.get('proxy', 'password')
  1936. if self.PROXY_ENABLE:
  1937. self.GAE_MODE = 'https'
  1938. self.AUTORANGE_HOSTS = self.CONFIG.get('autorange', 'hosts').split('|')
  1939. self.AUTORANGE_HOSTS_MATCH = [re.compile(fnmatch.translate(h)).match for h in self.AUTORANGE_HOSTS]
  1940. self.AUTORANGE_ENDSWITH = tuple(self.CONFIG.get('autorange', 'endswith').split('|'))
  1941. self.AUTORANGE_NOENDSWITH = tuple(self.CONFIG.get('autorange', 'noendswith').split('|'))
  1942. self.AUTORANGE_MAXSIZE = self.CONFIG.getint('autorange', 'maxsize')
  1943. self.AUTORANGE_WAITSIZE = self.CONFIG.getint('autorange', 'waitsize')
  1944. self.AUTORANGE_BUFSIZE = self.CONFIG.getint('autorange', 'bufsize')
  1945. self.AUTORANGE_THREADS = self.CONFIG.getint('autorange', 'threads')
  1946. self.FETCHMAX_LOCAL = self.CONFIG.getint('fetchmax', 'local') if self.CONFIG.get('fetchmax', 'local') else 3
  1947. self.FETCHMAX_SERVER = self.CONFIG.get('fetchmax', 'server')
  1948. self.DNS_ENABLE = self.CONFIG.getint('dns', 'enable')
  1949. self.DNS_LISTEN = self.CONFIG.get('dns', 'listen')
  1950. self.DNS_SERVERS = self.HTTP_DNS or self.CONFIG.get('dns', 'servers').split('|')
  1951. self.DNS_BLACKLIST = set(self.CONFIG.get('dns', 'blacklist').split('|'))
  1952. self.DNS_TCPOVER = tuple(self.CONFIG.get('dns', 'tcpover').split('|')) if self.CONFIG.get('dns', 'tcpover').strip() else tuple()
  1953. self.USERAGENT_ENABLE = self.CONFIG.getint('useragent', 'enable')
  1954. self.USERAGENT_STRING = self.CONFIG.get('useragent', 'string')
  1955. self.LOVE_ENABLE = self.CONFIG.getint('love', 'enable')
  1956. self.LOVE_TIP = self.CONFIG.get('love', 'tip').encode('utf8').decode('unicode-escape').split('|')
  1957. def extend_iplist(self, iplist_name, hosts):
  1958. logging.info('extend_iplist start for hosts=%s', hosts)
  1959. new_iplist = []
  1960. def do_remote_resolve(host, dnsserver, queue):
  1961. assert isinstance(dnsserver, basestring)
  1962. for dnslib_resolve in (dnslib_resolve_over_udp, dnslib_resolve_over_tcp):
  1963. try:
  1964. time.sleep(random.random())
  1965. iplist = dnslib_record2iplist(dnslib_resolve(host, [dnsserver], timeout=4, blacklist=self.DNS_BLACKLIST))
  1966. queue.put((host, dnsserver, iplist))
  1967. except (socket.error, OSError) as e:
  1968. logging.debug('%r remote host=%r failed: %s', dnslib_resolve, host, e)
  1969. time.sleep(1)
  1970. result_queue = Queue.Queue()
  1971. for host in hosts:
  1972. for dnsserver in self.DNS_SERVERS:
  1973. logging.debug('remote resolve host=%r from dnsserver=%r', host, dnsserver)
  1974. thread.start_new_thread(do_remote_resolve, (host, dnsserver, result_queue))
  1975. for _ in xrange(len(self.DNS_SERVERS) * len(hosts) * 2):
  1976. try:
  1977. host, dnsserver, iplist = result_queue.get(timeout=16)
  1978. logging.debug('%r remote host=%r return %s', dnsserver, host, iplist)
  1979. if host.endswith('.google.com'):
  1980. iplist = [x for x in iplist if is_google_ip(x)]
  1981. new_iplist += iplist
  1982. except Queue.Empty:
  1983. break
  1984. logging.info('extend_iplist finished, added %s', len(set(self.IPLIST_MAP[iplist_name])-set(new_iplist)))
  1985. self.IPLIST_MAP[iplist_name] = list(set(self.IPLIST_MAP[iplist_name] + new_iplist))
  1986. def resolve_iplist(self):
  1987. # https://support.google.com/websearch/answer/186669?hl=zh-Hans
  1988. def do_local_resolve(host, queue):
  1989. assert isinstance(host, basestring)
  1990. for _ in xrange(3):
  1991. try:
  1992. queue.put((host, socket.gethostbyname_ex(host)[-1]))
  1993. except (socket.error, OSError) as e:
  1994. logging.warning('socket.gethostbyname_ex host=%r failed: %s', host, e)
  1995. time.sleep(0.1)
  1996. google_blacklist = ['216.239.32.20'] + list(self.DNS_BLACKLIST)
  1997. for name, need_resolve_hosts in list(self.IPLIST_MAP.items()):
  1998. if all(re.match(r'\d+\.\d+\.\d+\.\d+', x) or ':' in x for x in need_resolve_hosts):
  1999. continue
  2000. need_resolve_remote = [x for x in need_resolve_hosts if ':' not in x and not re.match(r'\d+\.\d+\.\d+\.\d+', x)]
  2001. resolved_iplist = [x for x in need_resolve_hosts if x not in need_resolve_remote]
  2002. result_queue = Queue.Queue()
  2003. for host in need_resolve_remote:
  2004. logging.debug('local resolve host=%r', host)
  2005. thread.start_new_thread(do_local_resolve, (host, result_queue))
  2006. for _ in xrange(len(need_resolve_remote)):
  2007. try:
  2008. host, iplist = result_queue.get(timeout=8)
  2009. if host.endswith('.google.com'):
  2010. iplist = [x for x in iplist if is_google_ip(x)]
  2011. resolved_iplist += iplist
  2012. except Queue.Empty:
  2013. break
  2014. if name == 'google_hk':
  2015. for delay in (1, 60, 150, 240, 300, 450, 600, 900):
  2016. spawn_later(delay, self.extend_iplist, name, need_resolve_remote)
  2017. if name.startswith('google_') and name not in ('google_cn', 'google_hk') and resolved_iplist:
  2018. iplist_prefix = re.split(r'[\.:]', resolved_iplist[0])[0]
  2019. resolved_iplist = list(set(x for x in resolved_iplist if x.startswith(iplist_prefix)))
  2020. else:
  2021. resolved_iplist = list(set(resolved_iplist))
  2022. if name.startswith('google_'):
  2023. resolved_iplist = list(set(resolved_iplist) - set(google_blacklist))
  2024. if len(resolved_iplist) == 0:
  2025. logging.error('resolve %s host return empty! please retry!', name)
  2026. sys.exit(-1)
  2027. logging.info('resolve name=%s host to iplist=%r', name, resolved_iplist)
  2028. common.IPLIST_MAP[name] = resolved_iplist
  2029. def info(self):
  2030. info = ''
  2031. info += '------------------------------------------------------\n'
  2032. info += 'GoAgent Version : %s (python/%s %spyopenssl/%s)\n' % (__version__, sys.version[:5], gevent and 'gevent/%s ' % gevent.__version__ or '', getattr(OpenSSL, '__version__', 'Disabled'))
  2033. info += 'Uvent Version : %s (pyuv/%s libuv/%s)\n' % (__import__('uvent').__version__, __import__('pyuv').__version__, __import__('pyuv').LIBUV_VERSION) if all(x in sys.modules for x in ('pyuv', 'uvent')) else ''
  2034. info += 'Listen Address : %s:%d\n' % (self.LISTEN_IP, self.LISTEN_PORT)
  2035. info += 'Local Proxy : %s:%s\n' % (self.PROXY_HOST, self.PROXY_PORT) if self.PROXY_ENABLE else ''
  2036. info += 'Debug INFO : %s\n' % self.LISTEN_DEBUGINFO if self.LISTEN_DEBUGINFO else ''
  2037. info += 'GAE Mode : %s\n' % self.GAE_MODE
  2038. info += 'GAE Profile : %s\n' % self.GAE_PROFILE if self.GAE_PROFILE else ''
  2039. info += 'GAE APPID : %s\n' % '|'.join(self.GAE_APPIDS)
  2040. info += 'GAE Validate : %s\n' % self.GAE_VALIDATE if self.GAE_VALIDATE else ''
  2041. info += 'GAE Obfuscate : %s\n' % self.GAE_OBFUSCATE if self.GAE_OBFUSCATE else ''
  2042. if common.PAC_ENABLE:
  2043. info += 'Pac Server : http://%s:%d/%s\n' % (self.PAC_IP if self.PAC_IP and self.PAC_IP != '0.0.0.0' else ProxyUtil.get_listen_ip(), self.PAC_PORT, self.PAC_FILE)
  2044. info += 'Pac File : file://%s\n' % os.path.abspath(self.PAC_FILE)
  2045. if common.PHP_ENABLE:
  2046. info += 'PHP Listen : %s\n' % common.PHP_LISTEN
  2047. info += 'PHP FetchServer : %s\n' % common.PHP_FETCHSERVER
  2048. if common.DNS_ENABLE:
  2049. info += 'DNS Listen : %s\n' % common.DNS_LISTEN
  2050. info += 'DNS Servers : %s\n' % '|'.join(common.DNS_SERVERS)
  2051. info += '------------------------------------------------------\n'
  2052. return info
  2053. common = Common()
  2054. def message_html(title, banner, detail=''):
  2055. MESSAGE_TEMPLATE = '''
  2056. <html><head>
  2057. <meta http-equiv="content-type" content="text/html;charset=utf-8">
  2058. <title>$title</title>
  2059. <style><!--
  2060. body {font-family: arial,sans-serif}
  2061. div.nav {margin-top: 1ex}
  2062. div.nav A {font-size: 10pt; font-family: arial,sans-serif}
  2063. span.nav {font-size: 10pt; font-family: arial,sans-serif; font-weight: bold}
  2064. div.nav A,span.big {font-size: 12pt; color: #0000cc}
  2065. div.nav A {font-size: 10pt; color: black}
  2066. A.l:link {color: #6f6f6f}
  2067. A.u:link {color: green}
  2068. //--></style>
  2069. </head>
  2070. <body text=#000000 bgcolor=#ffffff>
  2071. <table border=0 cellpadding=2 cellspacing=0 width=100%>
  2072. <tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff><b>Message From LocalProxy</b></td></tr>
  2073. <tr><td> </td></tr></table>
  2074. <blockquote>
  2075. <H1>$banner</H1>
  2076. $detail
  2077. <p>
  2078. </blockquote>
  2079. <table width=100% cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img alt="" width=1 height=4></td></tr></table>
  2080. </body></html>
  2081. '''
  2082. return string.Template(MESSAGE_TEMPLATE).substitute(title=title, banner=banner, detail=detail)
  2083. try:
  2084. from Crypto.Cipher.ARC4 import new as RC4Cipher
  2085. except ImportError:
  2086. logging.warn('Load Crypto.Cipher.ARC4 Failed, Use Pure Python Instead.')
  2087. class RC4Cipher(object):
  2088. def __init__(self, key):
  2089. x = 0
  2090. box = range(256)
  2091. for i, y in enumerate(box):
  2092. x = (x + y + ord(key[i % len(key)])) & 0xff
  2093. box[i], box[x] = box[x], y
  2094. self.__box = box
  2095. self.__x = 0
  2096. self.__y = 0
  2097. def encrypt(self, data):
  2098. out = []
  2099. out_append = out.append
  2100. x = self.__x
  2101. y = self.__y
  2102. box = self.__box
  2103. for char in data:
  2104. x = (x + 1) & 0xff
  2105. y = (y + box[x]) & 0xff
  2106. box[x], box[y] = box[y], box[x]
  2107. out_append(chr(ord(char) ^ box[(box[x] + box[y]) & 0xff]))
  2108. self.__x = x
  2109. self.__y = y
  2110. return ''.join(out)
  2111. def read_random_bits(nbits):
  2112. '''Reads 'nbits' random bits.
  2113. If nbits isn't a whole number of bytes, an extra byte will be appended with
  2114. only the lower bits set.
  2115. '''
  2116. nbytes, rbits = divmod(nbits, 8)
  2117. # Get the random bytes
  2118. randomdata = os.urandom(nbytes)
  2119. # Add the remaining random bits
  2120. if rbits > 0:
  2121. randomvalue = ord(os.urandom(1))
  2122. randomvalue >>= (8 - rbits)
  2123. randomdata = byte(randomvalue) + randomdata
  2124. return randomdata
  2125. def generate_RSA(bits=2048):
  2126. '''
  2127. Generate an RSA keypair with an exponent of 65537 in PEM format
  2128. param: bits The key length in bits
  2129. Return private key and public key
  2130. '''
  2131. from Crypto.PublicKey import RSA
  2132. new_key = RSA.generate(bits, e=65537)
  2133. public_key = new_key.publickey().exportKey("PEM")
  2134. private_key = new_key.exportKey("PEM")
  2135. print private_key
  2136. print public_key
  2137. class XORCipher(object):
  2138. """XOR Cipher Class"""
  2139. def __init__(self, key):
  2140. self.__key_gen = itertools.cycle([ord(x) for x in key]).next
  2141. self.__key_xor = lambda s: ''.join(chr(ord(x) ^ self.__key_gen()) for x in s)
  2142. if len(key) == 1:
  2143. try:
  2144. from Crypto.Util.strxor import strxor_c
  2145. c = ord(key)
  2146. self.__key_xor = lambda s: strxor_c(s, c)
  2147. except ImportError:
  2148. sys.stderr.write('Load Crypto.Util.strxor Failed, Use Pure Python Instead.\n')
  2149. def encrypt(self, data):
  2150. return self.__key_xor(data)
  2151. class CipherFileObject(object):
  2152. """fileobj wrapper for cipher"""
  2153. def __init__(self, fileobj, cipher):
  2154. self.__fileobj = fileobj
  2155. self.__cipher = cipher
  2156. def __getattr__(self, attr):
  2157. if attr not in ('__fileobj', '__cipher'):
  2158. return getattr(self.__fileobj, attr)
  2159. def read(self, size=-1):
  2160. return self.__cipher.encrypt(self.__fileobj.read(size))
  2161. class LocalProxyServer(SocketServer.ThreadingTCPServer):
  2162. """Local Proxy Server"""
  2163. allow_reuse_address = True
  2164. daemon_threads = True
  2165. def close_request(self, request):
  2166. try:
  2167. request.close()
  2168. except StandardError:
  2169. pass
  2170. def finish_request(self, request, client_address):
  2171. try:
  2172. self.RequestHandlerClass(request, client_address, self)
  2173. except NetWorkIOError as e:
  2174. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  2175. raise
  2176. def handle_error(self, *args):
  2177. """make ThreadingTCPServer happy"""
  2178. exc_info = sys.exc_info()
  2179. error = exc_info and len(exc_info) and exc_info[1]
  2180. if isinstance(error, NetWorkIOError) and len(error.args) > 1 and 'bad write retry' in error.args[1]:
  2181. exc_info = error = None
  2182. else:
  2183. del exc_info, error
  2184. SocketServer.ThreadingTCPServer.handle_error(self, *args)
  2185. class UserAgentFilter(BaseProxyHandlerFilter):
  2186. """user agent filter"""
  2187. def __init__(self, user_agent):
  2188. self.user_agent = user_agent
  2189. def filter(self, handler):
  2190. handler.headers['User-Agent'] = self.user_agent
  2191. class ForceHttpsFilter(BaseProxyHandlerFilter):
  2192. """force https filter"""
  2193. def __init__(self, forcehttps_sites, noforcehttps_sites):
  2194. self.forcehttps_sites = tuple(forcehttps_sites)
  2195. self.noforcehttps_sites = set(noforcehttps_sites)
  2196. def filter(self, handler):
  2197. if handler.command != 'CONNECT' and handler.host.endswith(self.forcehttps_sites) and handler.host not in self.noforcehttps_sites:
  2198. if not handler.headers.get('Referer', '').startswith('https://') and not handler.path.startswith('https://'):
  2199. logging.debug('ForceHttpsFilter metched %r %r', handler.path, handler.headers)
  2200. headers = {'Location': handler.path.replace('http://', 'https://', 1), 'Connection': 'close'}
  2201. return [handler.MOCK, 301, headers, '']
  2202. class FakeHttpsFilter(BaseProxyHandlerFilter):
  2203. """fake https filter"""
  2204. def __init__(self, fakehttps_sites, nofakehttps_sites):
  2205. self.fakehttps_sites = tuple(fakehttps_sites)
  2206. self.nofakehttps_sites = set(nofakehttps_sites)
  2207. def filter(self, handler):
  2208. if handler.command == 'CONNECT' and handler.host.endswith(self.fakehttps_sites) and handler.host not in self.nofakehttps_sites:
  2209. logging.debug('FakeHttpsFilter metched %r %r', handler.path, handler.headers)
  2210. return [handler.STRIP, True, None]
  2211. class URLRewriteFilter(BaseProxyHandlerFilter):
  2212. """url rewrite filter"""
  2213. rules = {
  2214. 'www.google.com': (r'^https?://www\.google\.com/url\?.*url=([^&]+)', lambda m: urllib.unquote_plus(m.group(1))),
  2215. 'www.google.com.hk': (r'^https?://www\.google\.com\.hk/url\?.*url=([^&]+)', lambda m: urllib.unquote_plus(m.group(1))),
  2216. }
  2217. def filter(self, handler):
  2218. if handler.host in self.rules:
  2219. pattern, callback = self.rules[handler.host]
  2220. m = re.search(pattern, handler.path)
  2221. if m:
  2222. logging.debug('URLRewriteFilter metched %r', handler.path)
  2223. headers = {'Location': callback(m), 'Connection': 'close'}
  2224. return [handler.MOCK, 301, headers, '']
  2225. class HostsFilter(BaseProxyHandlerFilter):
  2226. """force https filter"""
  2227. def filter_localfile(self, handler, filename):
  2228. content_type = None
  2229. try:
  2230. import mimetypes
  2231. content_type = mimetypes.types_map.get(os.path.splitext(filename)[1])
  2232. except StandardError as e:
  2233. logging.error('import mimetypes failed: %r', e)
  2234. try:
  2235. with open(filename, 'rb') as fp:
  2236. data = fp.read()
  2237. headers = {'Connection': 'close', 'Content-Length': str(len(data))}
  2238. if content_type:
  2239. headers['Content-Type'] = content_type
  2240. return [handler.MOCK, 200, headers, data]
  2241. except StandardError as e:
  2242. return [handler.MOCK, 403, {'Connection': 'close'}, 'read %r %r' % (filename, e)]
  2243. def filter(self, handler):
  2244. host, port = handler.host, handler.port
  2245. hostport = handler.path if handler.command == 'CONNECT' else '%s:%d' % (host, port)
  2246. hostname = ''
  2247. if host in common.HOST_MAP:
  2248. hostname = common.HOST_MAP[host] or host
  2249. elif host.endswith(common.HOST_POSTFIX_ENDSWITH):
  2250. hostname = next(common.HOST_POSTFIX_MAP[x] for x in common.HOST_POSTFIX_MAP if host.endswith(x)) or host
  2251. common.HOST_MAP[host] = hostname
  2252. if hostport in common.HOSTPORT_MAP:
  2253. hostname = common.HOSTPORT_MAP[hostport] or host
  2254. elif hostport.endswith(common.HOSTPORT_POSTFIX_ENDSWITH):
  2255. hostname = next(common.HOSTPORT_POSTFIX_MAP[x] for x in common.HOSTPORT_POSTFIX_MAP if hostport.endswith(x)) or host
  2256. common.HOSTPORT_MAP[hostport] = hostname
  2257. if handler.command != 'CONNECT' and common.URLRE_MAP:
  2258. try:
  2259. hostname = next(common.URLRE_MAP[x] for x in common.URLRE_MAP if x(handler.path)) or host
  2260. except StopIteration:
  2261. pass
  2262. if not hostname:
  2263. return None
  2264. elif hostname in common.IPLIST_MAP:
  2265. handler.dns_cache[host] = common.IPLIST_MAP[hostname]
  2266. elif hostname == host and host.endswith(common.DNS_TCPOVER) and host not in handler.dns_cache:
  2267. try:
  2268. iplist = dnslib_record2iplist(dnslib_resolve_over_tcp(host, handler.dns_servers, timeout=4, blacklist=handler.dns_blacklist))
  2269. logging.info('HostsFilter dnslib_resolve_over_tcp %r with %r return %s', host, handler.dns_servers, iplist)
  2270. handler.dns_cache[host] = iplist
  2271. except socket.error as e:
  2272. logging.debug('HostsFilter dnslib_resolve_over_tcp %r with %r failed: %r', host, handler.dns_servers, e)
  2273. elif re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2274. handler.dns_cache[host] = [hostname]
  2275. elif hostname.startswith('file://'):
  2276. filename = hostname.lstrip('file://')
  2277. if os.name == 'nt':
  2278. filename = filename.lstrip('/')
  2279. return self.filter_localfile(handler, filename)
  2280. cache_key = '%s:%s' % (hostname, port)
  2281. if handler.command == 'CONNECT':
  2282. return [handler.FORWARD, host, port, handler.connect_timeout, {'cache_key': cache_key}]
  2283. else:
  2284. if host.endswith(common.HTTP_CRLFSITES):
  2285. handler.close_connection = True
  2286. return [handler.DIRECT, {'crlf': True}]
  2287. else:
  2288. return [handler.DIRECT, {'cache_key': cache_key}]
  2289. class DirectRegionFilter(BaseProxyHandlerFilter):
  2290. """direct region filter"""
  2291. geoip = pygeoip.GeoIP(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'GeoIP.dat')) if pygeoip and common.GAE_REGIONS else None
  2292. region_cache = LRUCache(16*1024)
  2293. def __init__(self, regions):
  2294. self.regions = set(regions)
  2295. def get_country_code(self, hostname, dnsservers):
  2296. """http://dev.maxmind.com/geoip/legacy/codes/iso3166/"""
  2297. try:
  2298. return self.region_cache[hostname]
  2299. except KeyError:
  2300. pass
  2301. try:
  2302. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  2303. iplist = [hostname]
  2304. elif dnsservers:
  2305. iplist = dnslib_record2iplist(dnslib_resolve_over_udp(hostname, dnsservers, timeout=2))
  2306. else:
  2307. iplist = socket.gethostbyname_ex(hostname)[-1]
  2308. country_code = self.geoip.country_code_by_addr(iplist[0])
  2309. except StandardError as e:
  2310. logging.warning('DirectRegionFilter cannot determine region for hostname=%r %r', hostname, e)
  2311. country_code = ''
  2312. self.region_cache[hostname] = country_code
  2313. return country_code
  2314. def filter(self, handler):
  2315. if self.geoip:
  2316. country_code = self.get_country_code(handler.host, handler.dns_servers)
  2317. if country_code in self.regions:
  2318. if handler.command == 'CONNECT':
  2319. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  2320. else:
  2321. return [handler.DIRECT, {}]
  2322. class AutoRangeFilter(BaseProxyHandlerFilter):
  2323. """force https filter"""
  2324. def filter(self, handler):
  2325. path = urlparse.urlsplit(handler.path).path
  2326. need_autorange = any(x(handler.host) for x in common.AUTORANGE_HOSTS_MATCH) or path.endswith(common.AUTORANGE_ENDSWITH)
  2327. if path.endswith(common.AUTORANGE_NOENDSWITH) or 'range=' in urlparse.urlsplit(path).query or handler.command == 'HEAD':
  2328. need_autorange = False
  2329. if handler.command != 'HEAD' and handler.headers.get('Range'):
  2330. m = re.search(r'bytes=(\d+)-', handler.headers['Range'])
  2331. start = int(m.group(1) if m else 0)
  2332. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2333. logging.info('autorange range=%r match url=%r', handler.headers['Range'], handler.path)
  2334. elif need_autorange:
  2335. logging.info('Found [autorange]endswith match url=%r', handler.path)
  2336. m = re.search(r'bytes=(\d+)-', handler.headers.get('Range', ''))
  2337. start = int(m.group(1) if m else 0)
  2338. handler.headers['Range'] = 'bytes=%d-%d' % (start, start+common.AUTORANGE_MAXSIZE-1)
  2339. class GAEFetchFilter(BaseProxyHandlerFilter):
  2340. """force https filter"""
  2341. def filter(self, handler):
  2342. """https://developers.google.com/appengine/docs/python/urlfetch/"""
  2343. if handler.command == 'CONNECT':
  2344. do_ssl_handshake = 440 <= handler.port <= 450 or 1024 <= handler.port <= 65535
  2345. return [handler.STRIP, do_ssl_handshake, self if not common.URLRE_MAP else None]
  2346. elif handler.command in ('GET', 'POST', 'HEAD', 'PUT', 'DELETE', 'PATCH'):
  2347. kwargs = {}
  2348. if common.GAE_PASSWORD:
  2349. kwargs['password'] = common.GAE_PASSWORD
  2350. if common.GAE_VALIDATE:
  2351. kwargs['validate'] = 1
  2352. if common.GAE_OPTIONS:
  2353. kwargs['options'] = common.GAE_OPTIONS
  2354. kwargs['rsa_key'] = __RSA_KEY__
  2355. else:
  2356. kwargs['options'] = ''
  2357. kwargs['rsa_key'] = None
  2358. fetchservers = ['%s://%s.appspot.com%s' % (common.GAE_MODE, x, common.GAE_PATH) for x in common.GAE_APPIDS]
  2359. if 'googlevideo.com' in handler.path:
  2360. kwargs['password'] = common.RANGEFETCH_PASSWORD
  2361. kwargs['options'] = common.RANGEFETCH_OPTIONS
  2362. kwargs['rsa_key'] = __RANGEFETCH_RSA_KEY__
  2363. fetchservers = ['%s://%s.appspot.com%s' % (common.GAE_MODE, x, common.RANGEFETCH_PATH) for x in common.RANGEFETCH_APPIDS]
  2364. return [handler.URLFETCH, fetchservers, common.FETCHMAX_LOCAL, kwargs]
  2365. else:
  2366. if common.PHP_ENABLE:
  2367. return PHPProxyHandler.handler_filters[-1].filter(handler)
  2368. else:
  2369. logging.warning('"%s %s" not supported by GAE, please enable PHP mode!', handler.command, handler.host)
  2370. return [handler.DIRECT, {}]
  2371. class WithGAEFilter(GAEFetchFilter):
  2372. """with gae filter"""
  2373. def __init__(self, withgae_sites):
  2374. self.withgae_sites = set(withgae_sites)
  2375. def filter(self, handler):
  2376. if handler.host in self.withgae_sites:
  2377. logging.debug('WithGAEFilter metched %r %r', handler.path, handler.headers)
  2378. return super(WithGAEFilter, self).filter(handler)
  2379. class GAEProxyHandler(AdvancedProxyHandler):
  2380. """GAE Proxy Handler"""
  2381. handler_filters = [GAEFetchFilter()]
  2382. def first_run(self):
  2383. """GAEProxyHandler setup, init domain/iplist map"""
  2384. if not common.PROXY_ENABLE:
  2385. logging.info('resolve common.IPLIST_MAP names=%s to iplist', list(common.IPLIST_MAP))
  2386. common.resolve_iplist()
  2387. random.shuffle(common.GAE_APPIDS)
  2388. for appid in common.GAE_APPIDS:
  2389. host = '%s.appspot.com' % appid
  2390. if host not in common.HOST_MAP:
  2391. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.appspot.com']
  2392. if host not in self.dns_cache:
  2393. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2394. for appid in common.RANGEFETCH_APPIDS:
  2395. host = '%s.appspot.com' % appid
  2396. if host not in common.HOST_MAP:
  2397. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.appspot.com']
  2398. if host not in self.dns_cache:
  2399. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2400. if common.GAE_PAGESPEED:
  2401. for i in xrange(1, 10):
  2402. host = '%d-ps.googleusercontent.com' % i
  2403. if host not in common.HOST_MAP:
  2404. common.HOST_MAP[host] = common.HOST_POSTFIX_MAP['.googleusercontent.com']
  2405. if host not in self.dns_cache:
  2406. self.dns_cache[host] = common.IPLIST_MAP[common.HOST_MAP[host]]
  2407. def gethostbyname2(self, hostname):
  2408. for postfix in ('.appspot.com', '.googleusercontent.com'):
  2409. if hostname.endswith(postfix):
  2410. host = common.HOST_MAP.get(hostname) or common.HOST_POSTFIX_MAP[postfix]
  2411. return common.IPLIST_MAP.get(host) or host.split('|')
  2412. return AdvancedProxyHandler.gethostbyname2(self, hostname)
  2413. def RANGEFETCH(self, response, fetchservers, **kwargs):
  2414. kwargs['password'] = common.RANGEFETCH_PASSWORD
  2415. kwargs['options'] = common.RANGEFETCH_OPTIONS
  2416. kwargs['rsa_key'] = __RANGEFETCH_RSA_KEY__
  2417. fetchservers = ['%s://%s.appspot.com%s' % (common.GAE_MODE, x, common.RANGEFETCH_PATH) for x in common.RANGEFETCH_APPIDS]
  2418. return RangeFetch(self, response, fetchservers, **kwargs).fetch()
  2419. def handle_urlfetch_error(self, fetchserver, response):
  2420. gae_appid = urlparse.urlsplit(fetchserver).hostname.split('.')[-3]
  2421. if response.app_status == 503:
  2422. # appid over qouta, switch to next appid
  2423. if gae_appid == common.GAE_APPIDS[0] and len(common.GAE_APPIDS) > 1:
  2424. common.GAE_APPIDS.append(common.GAE_APPIDS.pop(0))
  2425. logging.info('gae_appid=%r over qouta, switch next appid=%r', gae_appid, common.GAE_APPIDS[0])
  2426. class PHPFetchFilter(BaseProxyHandlerFilter):
  2427. """force https filter"""
  2428. def filter(self, handler):
  2429. if handler.command == 'CONNECT':
  2430. return [handler.STRIP, True, self]
  2431. else:
  2432. kwargs = {}
  2433. if common.PHP_PASSWORD:
  2434. kwargs['password'] = common.PHP_PASSWORD
  2435. if common.PHP_VALIDATE:
  2436. kwargs['validate'] = 1
  2437. return [handler.URLFETCH, [common.PHP_FETCHSERVER], 1, kwargs]
  2438. class PHPProxyHandler(AdvancedProxyHandler):
  2439. """PHP Proxy Handler"""
  2440. first_run_lock = threading.Lock()
  2441. handler_filters = [PHPFetchFilter()]
  2442. def first_run(self):
  2443. if common.PHP_USEHOSTS:
  2444. self.handler_filters.insert(-1, HostsFilter())
  2445. if not common.PROXY_ENABLE:
  2446. common.resolve_iplist()
  2447. fetchhost = urlparse.urlsplit(common.PHP_FETCHSERVER).hostname
  2448. logging.info('resolve common.PHP_FETCHSERVER domain=%r to iplist', fetchhost)
  2449. if common.PHP_USEHOSTS and fetchhost in common.HOST_MAP:
  2450. hostname = common.HOST_MAP[fetchhost]
  2451. fetchhost_iplist = sum([socket.gethostbyname_ex(x)[-1] for x in common.IPLIST_MAP.get(hostname) or hostname.split('|')], [])
  2452. else:
  2453. fetchhost_iplist = self.gethostbyname2(fetchhost)
  2454. if len(fetchhost_iplist) == 0:
  2455. logging.error('resolve %r domain return empty! please use ip list to replace domain list!', fetchhost)
  2456. sys.exit(-1)
  2457. self.dns_cache[fetchhost] = list(set(fetchhost_iplist))
  2458. logging.info('resolve common.PHP_FETCHSERVER domain to iplist=%r', fetchhost_iplist)
  2459. return True
  2460. class ProxyChainMixin:
  2461. """proxy chain mixin"""
  2462. def gethostbyname2(self, hostname):
  2463. try:
  2464. return socket.gethostbyname_ex(hostname)[-1]
  2465. except socket.error:
  2466. return [hostname]
  2467. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  2468. sock = socket.create_connection((common.PROXY_HOST, int(common.PROXY_PORT)))
  2469. if hostname.endswith('.appspot.com'):
  2470. hostname = 'www.google.com'
  2471. request_data = 'CONNECT %s:%s HTTP/1.1\r\n' % (hostname, port)
  2472. if common.PROXY_USERNAME and common.PROXY_PASSWROD:
  2473. request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (common.PROXY_USERNAME, common.PROXY_PASSWROD)).encode()).decode().strip()
  2474. request_data += '\r\n'
  2475. sock.sendall(request_data)
  2476. response = httplib.HTTPResponse(sock)
  2477. response.fp.close()
  2478. response.fp = sock.makefile('rb', 0)
  2479. response.begin()
  2480. if response.status >= 400:
  2481. raise httplib.BadStatusLine('%s %s %s' % (response.version, response.status, response.reason))
  2482. return sock
  2483. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  2484. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  2485. ssl_sock = ssl.wrap_socket(sock)
  2486. return ssl_sock
  2487. class GreenForwardMixin:
  2488. """green forward mixin"""
  2489. @staticmethod
  2490. def io_copy(dest, source, timeout, bufsize):
  2491. try:
  2492. dest.settimeout(timeout)
  2493. source.settimeout(timeout)
  2494. while 1:
  2495. data = source.recv(bufsize)
  2496. if not data:
  2497. break
  2498. dest.sendall(data)
  2499. except socket.timeout:
  2500. pass
  2501. except NetWorkIOError as e:
  2502. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  2503. raise
  2504. if e.args[0] in (errno.EBADF,):
  2505. return
  2506. finally:
  2507. for sock in (dest, source):
  2508. try:
  2509. sock.close()
  2510. except StandardError:
  2511. pass
  2512. def forward_socket(self, local, remote, timeout):
  2513. """forward socket"""
  2514. bufsize = self.bufsize
  2515. thread.start_new_thread(GreenForwardMixin.io_copy, (remote.dup(), local.dup(), timeout, bufsize))
  2516. GreenForwardMixin.io_copy(local, remote, timeout, bufsize)
  2517. class ProxyChainGAEProxyHandler(ProxyChainMixin, GAEProxyHandler):
  2518. pass
  2519. class ProxyChainPHPProxyHandler(ProxyChainMixin, PHPProxyHandler):
  2520. pass
  2521. class GreenForwardGAEProxyHandler(GreenForwardMixin, GAEProxyHandler):
  2522. pass
  2523. class GreenForwardPHPProxyHandler(GreenForwardMixin, PHPProxyHandler):
  2524. pass
  2525. class ProxyChainGreenForwardGAEProxyHandler(ProxyChainMixin, GreenForwardGAEProxyHandler):
  2526. pass
  2527. class ProxyChainGreenForwardPHPProxyHandler(ProxyChainMixin, GreenForwardPHPProxyHandler):
  2528. pass
  2529. def get_uptime():
  2530. if os.name == 'nt':
  2531. import ctypes
  2532. try:
  2533. tick = ctypes.windll.kernel32.GetTickCount64()
  2534. except AttributeError:
  2535. tick = ctypes.windll.kernel32.GetTickCount()
  2536. return tick / 1000.0
  2537. elif os.path.isfile('/proc/uptime'):
  2538. with open('/proc/uptime', 'rb') as fp:
  2539. uptime = fp.readline().strip().split()[0].strip()
  2540. return float(uptime)
  2541. elif any(os.path.isfile(os.path.join(x, 'uptime')) for x in os.environ['PATH'].split(os.pathsep)):
  2542. # http://www.opensource.apple.com/source/lldb/lldb-69/test/pexpect-2.4/examples/uptime.py
  2543. pattern = r'up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])'
  2544. output = os.popen('uptime').read()
  2545. duration, _, _, _, _ = re.search(pattern, output).groups()
  2546. days, hours, mins = 0, 0, 0
  2547. if 'day' in duration:
  2548. m = re.search(r'([0-9]+)\s+day', duration)
  2549. days = int(m.group(1))
  2550. if ':' in duration:
  2551. m = re.search(r'([0-9]+):([0-9]+)', duration)
  2552. hours = int(m.group(1))
  2553. mins = int(m.group(2))
  2554. if 'min' in duration:
  2555. m = re.search(r'([0-9]+)\s+min', duration)
  2556. mins = int(m.group(1))
  2557. return days * 86400 + hours * 3600 + mins * 60
  2558. else:
  2559. #TODO: support other platforms
  2560. return None
  2561. class PacUtil(object):
  2562. """GoAgent Pac Util"""
  2563. @staticmethod
  2564. def update_pacfile(filename):
  2565. listen_ip = '127.0.0.1'
  2566. autoproxy = '%s:%s' % (listen_ip, common.LISTEN_PORT)
  2567. blackhole = '%s:%s' % (listen_ip, common.PAC_PORT)
  2568. default = 'PROXY %s:%s' % (common.PROXY_HOST, common.PROXY_PORT) if common.PROXY_ENABLE else 'DIRECT'
  2569. opener = urllib2.build_opener(urllib2.ProxyHandler({'http': autoproxy, 'https': autoproxy}))
  2570. content = ''
  2571. need_update = True
  2572. with open(filename, 'rb') as fp:
  2573. content = fp.read()
  2574. try:
  2575. placeholder = '// AUTO-GENERATED RULES, DO NOT MODIFY!'
  2576. content = content[:content.index(placeholder)+len(placeholder)]
  2577. content = re.sub(r'''blackhole\s*=\s*['"]PROXY [\.\w:]+['"]''', 'blackhole = \'PROXY %s\'' % blackhole, content)
  2578. content = re.sub(r'''autoproxy\s*=\s*['"]PROXY [\.\w:]+['"]''', 'autoproxy = \'PROXY %s\'' % autoproxy, content)
  2579. content = re.sub(r'''defaultproxy\s*=\s*['"](DIRECT|PROXY [\.\w:]+)['"]''', 'defaultproxy = \'%s\'' % default, content)
  2580. content = re.sub(r'''host\s*==\s*['"][\.\w:]+['"]\s*\|\|\s*isPlainHostName''', 'host == \'%s\' || isPlainHostName' % listen_ip, content)
  2581. if content.startswith('//'):
  2582. line = '// Proxy Auto-Config file generated by autoproxy2pac, %s\r\n' % time.strftime('%Y-%m-%d %H:%M:%S')
  2583. content = line + '\r\n'.join(content.splitlines()[1:])
  2584. except ValueError:
  2585. need_update = False
  2586. try:
  2587. if common.PAC_ADBLOCK:
  2588. admode = common.PAC_ADMODE
  2589. logging.info('try download %r to update_pacfile(%r)', common.PAC_ADBLOCK, filename)
  2590. adblock_content = opener.open(common.PAC_ADBLOCK).read()
  2591. logging.info('%r downloaded, try convert it with adblock2pac', common.PAC_ADBLOCK)
  2592. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2593. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.adblock2pac, (adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode))
  2594. else:
  2595. jsrule = PacUtil.adblock2pac(adblock_content, 'FindProxyForURLByAdblock', blackhole, default, admode)
  2596. content += '\r\n' + jsrule + '\r\n'
  2597. logging.info('%r downloaded and parsed', common.PAC_ADBLOCK)
  2598. else:
  2599. content += '\r\nfunction FindProxyForURLByAdblock(url, host) {return "DIRECT";}\r\n'
  2600. except StandardError as e:
  2601. need_update = False
  2602. logging.exception('update_pacfile failed: %r', e)
  2603. try:
  2604. logging.info('try download %r to update_pacfile(%r)', common.PAC_GFWLIST, filename)
  2605. autoproxy_content = base64.b64decode(opener.open(common.PAC_GFWLIST).read())
  2606. if common.PAC_WHITELIST:
  2607. logging.info('%r downloaded, try convert it with autoproxy2pac_whitelist', common.PAC_GFWLIST)
  2608. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2609. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.autoproxy2pac_whitelist, (autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default))
  2610. else:
  2611. jsrule = PacUtil.autoproxy2pac_lite(autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default)
  2612. else:
  2613. logging.info('%r downloaded, try convert it with autoproxy2pac_lite', common.PAC_GFWLIST)
  2614. if 'gevent' in sys.modules and time.sleep is getattr(sys.modules['gevent'], 'sleep', None) and hasattr(gevent.get_hub(), 'threadpool'):
  2615. jsrule = gevent.get_hub().threadpool.apply_e(Exception, PacUtil.autoproxy2pac_lite, (autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default))
  2616. else:
  2617. jsrule = PacUtil.autoproxy2pac_lite(autoproxy_content, 'FindProxyForURLByAutoProxy', autoproxy, default)
  2618. content += '\r\n' + jsrule + '\r\n'
  2619. logging.info('%r downloaded and parsed', common.PAC_GFWLIST)
  2620. except StandardError as e:
  2621. need_update = False
  2622. logging.exception('update_pacfile failed: %r', e)
  2623. if need_update:
  2624. with open(filename, 'wb') as fp:
  2625. fp.write(content)
  2626. logging.info('%r successfully updated', filename)
  2627. @staticmethod
  2628. def autoproxy2pac(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2629. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2630. jsLines = []
  2631. for line in content.splitlines()[1:]:
  2632. if line and not line.startswith("!"):
  2633. use_proxy = True
  2634. if line.startswith("@@"):
  2635. line = line[2:]
  2636. use_proxy = False
  2637. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2638. if line.startswith('/') and line.endswith('/'):
  2639. jsLine = 'if (/%s/i.test(url)) return "%s";' % (line[1:-1], return_proxy)
  2640. elif line.startswith('||'):
  2641. domain = line[2:].lstrip('.')
  2642. if len(jsLines) > 0 and ('host.indexOf(".%s") >= 0' % domain in jsLines[-1] or 'host.indexOf("%s") >= 0' % domain in jsLines[-1]):
  2643. jsLines.pop()
  2644. jsLine = 'if (dnsDomainIs(host, ".%s") || host == "%s") return "%s";' % (domain, domain, return_proxy)
  2645. elif line.startswith('|'):
  2646. jsLine = 'if (url.indexOf("%s") == 0) return "%s";' % (line[1:], return_proxy)
  2647. elif '*' in line:
  2648. jsLine = 'if (shExpMatch(url, "*%s*")) return "%s";' % (line.strip('*'), return_proxy)
  2649. elif '/' not in line:
  2650. jsLine = 'if (host.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2651. else:
  2652. jsLine = 'if (url.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2653. jsLine = ' ' * indent + jsLine
  2654. if use_proxy:
  2655. jsLines.append(jsLine)
  2656. else:
  2657. jsLines.insert(0, jsLine)
  2658. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2659. return function
  2660. @staticmethod
  2661. def autoproxy2pac_whitelist(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2662. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2663. jsLines = []
  2664. for line in content.splitlines()[1:]:
  2665. if line and not line.startswith("!"):
  2666. use_proxy = True
  2667. if line.startswith("@@"):
  2668. line = line[2:]
  2669. use_proxy = False
  2670. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2671. if line.startswith('/') and line.endswith('/'):
  2672. jsLine = 'if (/%s/i.test(url)) return "%s";' % (line[1:-1], return_proxy)
  2673. elif line.startswith('||'):
  2674. domain = line[2:].lstrip('.')
  2675. if len(jsLines) > 0 and ('host.indexOf(".%s") >= 0' % domain in jsLines[-1] or 'host.indexOf("%s") >= 0' % domain in jsLines[-1]):
  2676. jsLines.pop()
  2677. jsLine = 'if (dnsDomainIs(host, ".%s") || host == "%s") return "%s";' % (domain, domain, return_proxy)
  2678. elif line.startswith('|'):
  2679. jsLine = 'if (url.indexOf("%s") == 0) return "%s";' % (line[1:], return_proxy)
  2680. elif '*' in line:
  2681. jsLine = 'if (shExpMatch(url, "*%s*")) return "%s";' % (line.strip('*'), return_proxy)
  2682. elif '/' not in line:
  2683. jsLine = 'if (host.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2684. else:
  2685. jsLine = 'if (url.indexOf("%s") >= 0) return "%s";' % (line, return_proxy)
  2686. jsLine = ' ' * indent + jsLine
  2687. if use_proxy:
  2688. jsLines.append(jsLine)
  2689. else:
  2690. jsLines.insert(0, jsLine)
  2691. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, 'PROXY %s' % proxy)
  2692. return function
  2693. @staticmethod
  2694. def autoproxy2pac_lite(content, func_name='FindProxyForURLByAutoProxy', proxy='127.0.0.1:8087', default='DIRECT', indent=4):
  2695. """Autoproxy to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2696. direct_domain_set = set([])
  2697. proxy_domain_set = set([])
  2698. for line in content.splitlines()[1:]:
  2699. if line and not line.startswith(('!', '|!', '||!')):
  2700. use_proxy = True
  2701. if line.startswith("@@"):
  2702. line = line[2:]
  2703. use_proxy = False
  2704. domain = ''
  2705. if line.startswith('/') and line.endswith('/'):
  2706. line = line[1:-1]
  2707. if line.startswith('^https?:\\/\\/[^\\/]+') and re.match(r'^(\w|\\\-|\\\.)+$', line[18:]):
  2708. domain = line[18:].replace(r'\.', '.')
  2709. else:
  2710. logging.warning('unsupport gfwlist regex: %r', line)
  2711. elif line.startswith('||'):
  2712. domain = line[2:].lstrip('*').rstrip('/')
  2713. elif line.startswith('|'):
  2714. domain = urlparse.urlsplit(line[1:]).hostname.lstrip('*')
  2715. elif line.startswith(('http://', 'https://')):
  2716. domain = urlparse.urlsplit(line).hostname.lstrip('*')
  2717. elif re.search(r'^([\w\-\_\.]+)([\*\/]|$)', line):
  2718. domain = re.split(r'[\*\/]', line)[0]
  2719. else:
  2720. pass
  2721. if '*' in domain:
  2722. domain = domain.split('*')[-1]
  2723. if not domain or re.match(r'^\w+$', domain):
  2724. logging.debug('unsupport gfwlist rule: %r', line)
  2725. continue
  2726. if use_proxy:
  2727. proxy_domain_set.add(domain)
  2728. else:
  2729. direct_domain_set.add(domain)
  2730. proxy_domain_list = sorted(set(x.lstrip('.') for x in proxy_domain_set))
  2731. autoproxy_host = ',\r\n'.join('%s"%s": 1' % (' '*indent, x) for x in proxy_domain_list)
  2732. template = '''\
  2733. var autoproxy_host = {
  2734. %(autoproxy_host)s
  2735. };
  2736. function %(func_name)s(url, host) {
  2737. var lastPos;
  2738. do {
  2739. if (autoproxy_host.hasOwnProperty(host)) {
  2740. return 'PROXY %(proxy)s';
  2741. }
  2742. lastPos = host.indexOf('.') + 1;
  2743. host = host.slice(lastPos);
  2744. } while (lastPos >= 1);
  2745. return '%(default)s';
  2746. }'''
  2747. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in template.splitlines()), '', template)
  2748. template_args = {'autoproxy_host': autoproxy_host,
  2749. 'func_name': func_name,
  2750. 'proxy': proxy,
  2751. 'default': default}
  2752. return template % template_args
  2753. @staticmethod
  2754. def urlfilter2pac(content, func_name='FindProxyForURLByUrlfilter', proxy='127.0.0.1:8086', default='DIRECT', indent=4):
  2755. """urlfilter.ini to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2756. jsLines = []
  2757. for line in content[content.index('[exclude]'):].splitlines()[1:]:
  2758. if line and not line.startswith(';'):
  2759. use_proxy = True
  2760. if line.startswith("@@"):
  2761. line = line[2:]
  2762. use_proxy = False
  2763. return_proxy = 'PROXY %s' % proxy if use_proxy else default
  2764. if '*' in line:
  2765. jsLine = 'if (shExpMatch(url, "%s")) return "%s";' % (line, return_proxy)
  2766. else:
  2767. jsLine = 'if (url == "%s") return "%s";' % (line, return_proxy)
  2768. jsLine = ' ' * indent + jsLine
  2769. if use_proxy:
  2770. jsLines.append(jsLine)
  2771. else:
  2772. jsLines.insert(0, jsLine)
  2773. function = 'function %s(url, host) {\r\n%s\r\n%sreturn "%s";\r\n}' % (func_name, '\n'.join(jsLines), ' '*indent, default)
  2774. return function
  2775. @staticmethod
  2776. def adblock2pac(content, func_name='FindProxyForURLByAdblock', proxy='127.0.0.1:8086', default='DIRECT', admode=1, indent=4):
  2777. """adblock list to Pac, based on https://github.com/iamamac/autoproxy2pac"""
  2778. white_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2779. black_conditions = {'host': [], 'url.indexOf': [], 'shExpMatch': []}
  2780. for line in content.splitlines()[1:]:
  2781. if not line or line.startswith('!') or '##' in line or '#@#' in line:
  2782. continue
  2783. use_proxy = True
  2784. use_start = False
  2785. use_end = False
  2786. use_domain = False
  2787. use_postfix = []
  2788. if '$' in line:
  2789. posfixs = line.split('$')[-1].split(',')
  2790. if any('domain' in x for x in posfixs):
  2791. continue
  2792. if 'image' in posfixs:
  2793. use_postfix += ['.jpg', '.gif']
  2794. elif 'script' in posfixs:
  2795. use_postfix += ['.js']
  2796. else:
  2797. continue
  2798. line = line.split('$')[0]
  2799. if line.startswith("@@"):
  2800. line = line[2:]
  2801. use_proxy = False
  2802. if '||' == line[:2]:
  2803. line = line[2:]
  2804. if '/' not in line:
  2805. use_domain = True
  2806. else:
  2807. use_start = True
  2808. elif '|' == line[0]:
  2809. line = line[1:]
  2810. use_start = True
  2811. if line[-1] in ('^', '|'):
  2812. line = line[:-1]
  2813. if not use_postfix:
  2814. use_end = True
  2815. line = line.replace('^', '*').strip('*')
  2816. conditions = black_conditions if use_proxy else white_conditions
  2817. if use_start and use_end:
  2818. conditions['shExpMatch'] += ['*%s*' % line]
  2819. elif use_start:
  2820. if '*' in line:
  2821. if use_postfix:
  2822. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2823. else:
  2824. conditions['shExpMatch'] += ['*%s*' % line]
  2825. else:
  2826. conditions['url.indexOf'] += [line]
  2827. elif use_domain and use_end:
  2828. if '*' in line:
  2829. conditions['shExpMatch'] += ['%s*' % line]
  2830. else:
  2831. conditions['host'] += [line]
  2832. elif use_domain:
  2833. if line.split('/')[0].count('.') <= 1:
  2834. if use_postfix:
  2835. conditions['shExpMatch'] += ['*.%s*%s' % (line, x) for x in use_postfix]
  2836. else:
  2837. conditions['shExpMatch'] += ['*.%s*' % line]
  2838. else:
  2839. if '*' in line:
  2840. if use_postfix:
  2841. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2842. else:
  2843. conditions['shExpMatch'] += ['*%s*' % line]
  2844. else:
  2845. if use_postfix:
  2846. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2847. else:
  2848. conditions['url.indexOf'] += ['http://%s' % line]
  2849. else:
  2850. if use_postfix:
  2851. conditions['shExpMatch'] += ['*%s*%s' % (line, x) for x in use_postfix]
  2852. else:
  2853. conditions['shExpMatch'] += ['*%s*' % line]
  2854. templates = ['''\
  2855. function %(func_name)s(url, host) {
  2856. return '%(default)s';
  2857. }''',
  2858. '''\
  2859. var blackhole_host = {
  2860. %(blackhole_host)s
  2861. };
  2862. function %(func_name)s(url, host) {
  2863. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2864. if (blackhole_host.hasOwnProperty(host)) {
  2865. return 'PROXY %(proxy)s';
  2866. }
  2867. return '%(default)s';
  2868. }''',
  2869. '''\
  2870. var blackhole_host = {
  2871. %(blackhole_host)s
  2872. };
  2873. var blackhole_url_indexOf = [
  2874. %(blackhole_url_indexOf)s
  2875. ];
  2876. function %s(url, host) {
  2877. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2878. if (blackhole_host.hasOwnProperty(host)) {
  2879. return 'PROXY %(proxy)s';
  2880. }
  2881. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2882. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2883. return 'PROXY %(proxy)s';
  2884. }
  2885. }
  2886. return '%(default)s';
  2887. }''',
  2888. '''\
  2889. var blackhole_host = {
  2890. %(blackhole_host)s
  2891. };
  2892. var blackhole_url_indexOf = [
  2893. %(blackhole_url_indexOf)s
  2894. ];
  2895. var blackhole_shExpMatch = [
  2896. %(blackhole_shExpMatch)s
  2897. ];
  2898. function %(func_name)s(url, host) {
  2899. // untrusted ablock plus list, disable whitelist until chinalist come back.
  2900. if (blackhole_host.hasOwnProperty(host)) {
  2901. return 'PROXY %(proxy)s';
  2902. }
  2903. for (i = 0; i < blackhole_url_indexOf.length; i++) {
  2904. if (url.indexOf(blackhole_url_indexOf[i]) >= 0) {
  2905. return 'PROXY %(proxy)s';
  2906. }
  2907. }
  2908. for (i = 0; i < blackhole_shExpMatch.length; i++) {
  2909. if (shExpMatch(url, blackhole_shExpMatch[i])) {
  2910. return 'PROXY %(proxy)s';
  2911. }
  2912. }
  2913. return '%(default)s';
  2914. }''']
  2915. template = re.sub(r'(?m)^\s{%d}' % min(len(re.search(r' +', x).group()) for x in templates[admode].splitlines()), '', templates[admode])
  2916. template_kwargs = {'blackhole_host': ',\r\n'.join("%s'%s': 1" % (' '*indent, x) for x in sorted(black_conditions['host'])),
  2917. 'blackhole_url_indexOf': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['url.indexOf'])),
  2918. 'blackhole_shExpMatch': ',\r\n'.join("%s'%s'" % (' '*indent, x) for x in sorted(black_conditions['shExpMatch'])),
  2919. 'func_name': func_name,
  2920. 'proxy': proxy,
  2921. 'default': default}
  2922. return template % template_kwargs
  2923. class PacFileFilter(BaseProxyHandlerFilter):
  2924. """pac file filter"""
  2925. def filter(self, handler):
  2926. is_local_client = handler.client_address[0] in ('127.0.0.1', '::1')
  2927. pacfile = os.path.join(os.path.dirname(os.path.abspath(__file__)), common.PAC_FILE)
  2928. urlparts = urlparse.urlsplit(handler.path)
  2929. if handler.command == 'GET' and urlparts.path.lstrip('/') == common.PAC_FILE:
  2930. if urlparts.query == 'flush':
  2931. if is_local_client:
  2932. thread.start_new_thread(PacUtil.update_pacfile, (pacfile,))
  2933. else:
  2934. return [handler.MOCK, 403, {'Content-Type': 'text/plain'}, 'client address %r not allowed' % handler.client_address[0]]
  2935. if time.time() - os.path.getmtime(pacfile) > common.PAC_EXPIRED:
  2936. # check system uptime > 30 minutes
  2937. uptime = get_uptime()
  2938. if uptime and uptime > 1800:
  2939. thread.start_new_thread(lambda: os.utime(pacfile, (time.time(), time.time())) or PacUtil.update_pacfile(pacfile), tuple())
  2940. with open(pacfile, 'rb') as fp:
  2941. content = fp.read()
  2942. if not is_local_client:
  2943. serving_addr = urlparts.hostname or ProxyUtil.get_listen_ip()
  2944. content = content.replace('127.0.0.1', serving_addr)
  2945. headers = {'Content-Type': 'text/plain'}
  2946. if 'gzip' in handler.headers.get('Accept-Encoding', ''):
  2947. headers['Content-Encoding'] = 'gzip'
  2948. compressobj = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
  2949. dataio = io.BytesIO()
  2950. dataio.write('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
  2951. dataio.write(compressobj.compress(content))
  2952. dataio.write(compressobj.flush())
  2953. dataio.write(struct.pack('<LL', zlib.crc32(content) & 0xFFFFFFFFL, len(content) & 0xFFFFFFFFL))
  2954. content = dataio.getvalue()
  2955. return [handler.MOCK, 200, headers, content]
  2956. class StaticFileFilter(BaseProxyHandlerFilter):
  2957. """static file filter"""
  2958. index_file = 'index.html'
  2959. def format_index_html(self, dirname):
  2960. INDEX_TEMPLATE = u'''
  2961. <html>
  2962. <title>Directory listing for $dirname</title>
  2963. <body>
  2964. <h2>Directory listing for $dirname</h2>
  2965. <hr>
  2966. <ul>
  2967. $html
  2968. </ul>
  2969. <hr>
  2970. </body></html>
  2971. '''
  2972. html = ''
  2973. if not isinstance(dirname, unicode):
  2974. dirname = dirname.decode(sys.getfilesystemencoding())
  2975. for name in os.listdir(dirname):
  2976. fullname = os.path.join(dirname, name)
  2977. suffix = u'/' if os.path.isdir(fullname) else u''
  2978. html += u'<li><a href="%s%s">%s%s</a>\r\n' % (name, suffix, name, suffix)
  2979. return string.Template(INDEX_TEMPLATE).substitute(dirname=dirname, html=html)
  2980. def filter(self, handler):
  2981. path = urlparse.urlsplit(handler.path).path
  2982. if path.startswith('/'):
  2983. path = urllib.unquote_plus(path.lstrip('/') or '.').decode('utf8')
  2984. if os.path.isdir(path):
  2985. index_file = os.path.join(path, self.index_file)
  2986. if not os.path.isfile(index_file):
  2987. content = self.format_index_html(path).encode('UTF-8')
  2988. headers = {'Content-Type': 'text/html; charset=utf-8', 'Connection': 'close'}
  2989. return [handler.MOCK, 200, headers, content]
  2990. else:
  2991. path = index_file
  2992. if os.path.isfile(path):
  2993. content_type = 'application/octet-stream'
  2994. try:
  2995. import mimetypes
  2996. content_type = mimetypes.types_map.get(os.path.splitext(path)[1])
  2997. except StandardError as e:
  2998. logging.error('import mimetypes failed: %r', e)
  2999. with open(path, 'rb') as fp:
  3000. content = fp.read()
  3001. headers = {'Connection': 'close', 'Content-Type': content_type}
  3002. return [handler.MOCK, 200, headers, content]
  3003. class BlackholeFilter(BaseProxyHandlerFilter):
  3004. """blackhole filter"""
  3005. one_pixel_gif = 'GIF89a\x01\x00\x01\x00\x80\xff\x00\xc0\xc0\xc0\x00\x00\x00!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;'
  3006. def filter(self, handler):
  3007. if handler.command == 'CONNECT':
  3008. return [handler.STRIP, True, self]
  3009. elif handler.path.startswith(('http://', 'https://')):
  3010. headers = {'Cache-Control': 'max-age=86400',
  3011. 'Expires': 'Oct, 01 Aug 2100 00:00:00 GMT',
  3012. 'Connection': 'close'}
  3013. content = ''
  3014. if urlparse.urlsplit(handler.path).path.lower().endswith(('.jpg', '.gif', '.png','.jpeg', '.bmp')):
  3015. headers['Content-Type'] = 'image/gif'
  3016. content = self.one_pixel_gif
  3017. return [handler.MOCK, 200, headers, content]
  3018. else:
  3019. return [handler.MOCK, 404, {'Connection': 'close'}, '']
  3020. class PACProxyHandler(SimpleProxyHandler):
  3021. """pac proxy handler"""
  3022. handler_filters = [PacFileFilter(), StaticFileFilter(), BlackholeFilter()]
  3023. def get_process_list():
  3024. import ctypes
  3025. import collections
  3026. Process = collections.namedtuple('Process', 'pid name exe')
  3027. process_list = []
  3028. if os.name == 'nt':
  3029. PROCESS_QUERY_INFORMATION = 0x0400
  3030. PROCESS_VM_READ = 0x0010
  3031. lpidProcess = (ctypes.c_ulong * 1024)()
  3032. cb = ctypes.sizeof(lpidProcess)
  3033. cbNeeded = ctypes.c_ulong()
  3034. ctypes.windll.psapi.EnumProcesses(ctypes.byref(lpidProcess), cb, ctypes.byref(cbNeeded))
  3035. nReturned = cbNeeded.value/ctypes.sizeof(ctypes.c_ulong())
  3036. pidProcess = [i for i in lpidProcess][:nReturned]
  3037. has_queryimage = hasattr(ctypes.windll.kernel32, 'QueryFullProcessImageNameA')
  3038. for pid in pidProcess:
  3039. hProcess = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, pid)
  3040. if hProcess:
  3041. modname = ctypes.create_string_buffer(2048)
  3042. count = ctypes.c_ulong(ctypes.sizeof(modname))
  3043. if has_queryimage:
  3044. ctypes.windll.kernel32.QueryFullProcessImageNameA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  3045. else:
  3046. ctypes.windll.psapi.GetModuleFileNameExA(hProcess, 0, ctypes.byref(modname), ctypes.byref(count))
  3047. exe = modname.value
  3048. name = os.path.basename(exe)
  3049. process_list.append(Process(pid=pid, name=name, exe=exe))
  3050. ctypes.windll.kernel32.CloseHandle(hProcess)
  3051. elif sys.platform.startswith('linux'):
  3052. for filename in glob.glob('/proc/[0-9]*/cmdline'):
  3053. pid = int(filename.split('/')[2])
  3054. exe_link = '/proc/%d/exe' % pid
  3055. if os.path.exists(exe_link):
  3056. exe = os.readlink(exe_link)
  3057. name = os.path.basename(exe)
  3058. process_list.append(Process(pid=pid, name=name, exe=exe))
  3059. else:
  3060. try:
  3061. import psutil
  3062. process_list = psutil.get_process_list()
  3063. except StandardError as e:
  3064. logging.exception('psutil.get_process_list() failed: %r', e)
  3065. return process_list
  3066. def pre_start():
  3067. if not OpenSSL:
  3068. logging.warning('python-openssl not found, please install it!')
  3069. if sys.platform == 'cygwin':
  3070. logging.info('cygwin is not officially supported, please continue at your own risk :)')
  3071. #sys.exit(-1)
  3072. elif os.name == 'posix':
  3073. try:
  3074. import resource
  3075. resource.setrlimit(resource.RLIMIT_NOFILE, (8192, -1))
  3076. except ValueError:
  3077. pass
  3078. elif os.name == 'nt':
  3079. import ctypes
  3080. ctypes.windll.kernel32.SetConsoleTitleW(u'GoAgent v%s' % __version__)
  3081. if not common.LISTEN_VISIBLE:
  3082. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 0)
  3083. else:
  3084. ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 1)
  3085. if common.LOVE_ENABLE and random.randint(1, 100) <= 5:
  3086. title = ctypes.create_unicode_buffer(1024)
  3087. ctypes.windll.kernel32.GetConsoleTitleW(ctypes.byref(title), len(title)-1)
  3088. ctypes.windll.kernel32.SetConsoleTitleW('%s %s' % (title.value, random.choice(common.LOVE_TIP)))
  3089. blacklist = {'360safe': False,
  3090. 'QQProtect': False, }
  3091. softwares = [k for k, v in blacklist.items() if v]
  3092. if softwares:
  3093. tasklist = '\n'.join(x.name for x in get_process_list()).lower()
  3094. softwares = [x for x in softwares if x.lower() in tasklist]
  3095. if softwares:
  3096. title = u'GoAgent 建议'
  3097. error = u'某些安全软件(如 %s)可能和本软件存在冲突,造成 CPU 占用过高。\n如有此现象建议暂时退出此安全软件来继续运行GoAgent' % ','.join(softwares)
  3098. ctypes.windll.user32.MessageBoxW(None, error, title, 0)
  3099. #sys.exit(0)
  3100. if os.path.isfile('/proc/cpuinfo'):
  3101. with open('/proc/cpuinfo', 'rb') as fp:
  3102. m = re.search(r'(?im)(BogoMIPS|cpu MHz)\s+:\s+([\d\.]+)', fp.read())
  3103. if m and float(m.group(2)) < 1000:
  3104. logging.warning("*NOTE*, Please set [gae]window=2 [gae]keepalive=1")
  3105. if GAEProxyHandler.max_window != common.GAE_WINDOW:
  3106. GAEProxyHandler.max_window = common.GAE_WINDOW
  3107. if common.GAE_KEEPALIVE and common.GAE_MODE == 'https':
  3108. GAEProxyHandler.ssl_connection_keepalive = True
  3109. if common.GAE_PAGESPEED and not common.GAE_OBFUSCATE:
  3110. logging.critical("*NOTE*, [gae]pagespeed=1 requires [gae]obfuscate=1")
  3111. sys.exit(-1)
  3112. if common.GAE_SSLVERSION:
  3113. GAEProxyHandler.ssl_version = getattr(ssl, 'PROTOCOL_%s' % common.GAE_SSLVERSION)
  3114. GAEProxyHandler.openssl_context = SSLConnection.context_builder(common.GAE_SSLVERSION)
  3115. if common.GAE_APPIDS[0] == 'goagent':
  3116. logging.critical('please edit %s to add your appid to [gae] !', common.CONFIG_FILENAME)
  3117. sys.exit(-1)
  3118. if common.GAE_MODE == 'http' and common.GAE_PASSWORD == '':
  3119. logging.critical('to enable http mode, you should set %r [gae]password = <your_pass> and [gae]options = rc4', common.CONFIG_FILENAME)
  3120. sys.exit(-1)
  3121. if common.GAE_TRANSPORT:
  3122. GAEProxyHandler.disable_transport_ssl = False
  3123. if common.GAE_REGIONS and not pygeoip:
  3124. logging.critical('to enable [gae]regions mode, you should install pygeoip')
  3125. sys.exit(-1)
  3126. if common.PAC_ENABLE:
  3127. pac_ip = ProxyUtil.get_listen_ip() if common.PAC_IP in ('', '::', '0.0.0.0') else common.PAC_IP
  3128. url = 'http://%s:%d/%s' % (pac_ip, common.PAC_PORT, common.PAC_FILE)
  3129. spawn_later(600, urllib2.build_opener(urllib2.ProxyHandler({})).open, url)
  3130. if not dnslib:
  3131. logging.error('dnslib not found, please put dnslib-0.8.3.egg to %r!', os.path.dirname(os.path.abspath(__file__)))
  3132. sys.exit(-1)
  3133. if not common.DNS_ENABLE:
  3134. if not common.HTTP_DNS:
  3135. common.HTTP_DNS = common.DNS_SERVERS[:]
  3136. for dnsservers_ref in (common.HTTP_DNS, common.DNS_SERVERS):
  3137. any(dnsservers_ref.insert(0, x) for x in [y for y in get_dnsserver_list() if y not in dnsservers_ref])
  3138. AdvancedProxyHandler.dns_servers = common.HTTP_DNS
  3139. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  3140. else:
  3141. AdvancedProxyHandler.dns_servers = common.HTTP_DNS or common.DNS_SERVERS
  3142. AdvancedProxyHandler.dns_blacklist = common.DNS_BLACKLIST
  3143. RangeFetch.threads = common.AUTORANGE_THREADS
  3144. RangeFetch.maxsize = common.AUTORANGE_MAXSIZE
  3145. RangeFetch.bufsize = common.AUTORANGE_BUFSIZE
  3146. RangeFetch.waitsize = common.AUTORANGE_WAITSIZE
  3147. if True:
  3148. GAEProxyHandler.handler_filters.insert(0, AutoRangeFilter())
  3149. if common.GAE_REGIONS:
  3150. GAEProxyHandler.handler_filters.insert(0, DirectRegionFilter(common.GAE_REGIONS))
  3151. if True:
  3152. GAEProxyHandler.handler_filters.insert(0, HostsFilter())
  3153. if True:
  3154. GAEProxyHandler.handler_filters.insert(0, URLRewriteFilter())
  3155. if common.HTTP_FAKEHTTPS:
  3156. GAEProxyHandler.handler_filters.insert(0, FakeHttpsFilter(common.HTTP_FAKEHTTPS, common.HTTP_NOFAKEHTTPS))
  3157. PHPProxyHandler.handler_filters.insert(0, FakeHttpsFilter(common.HTTP_FAKEHTTPS, common.HTTP_NOFAKEHTTPS))
  3158. if common.HTTP_FORCEHTTPS:
  3159. GAEProxyHandler.handler_filters.insert(0, ForceHttpsFilter(common.HTTP_FORCEHTTPS, common.HTTP_NOFORCEHTTPS))
  3160. PHPProxyHandler.handler_filters.insert(0, ForceHttpsFilter(common.HTTP_FORCEHTTPS, common.HTTP_NOFORCEHTTPS))
  3161. if common.HTTP_WITHGAE:
  3162. GAEProxyHandler.handler_filters.insert(0, WithGAEFilter(common.HTTP_WITHGAE))
  3163. if common.USERAGENT_ENABLE:
  3164. GAEProxyHandler.handler_filters.insert(0, UserAgentFilter(common.USERAGENT_STRING))
  3165. PHPProxyHandler.handler_filters.insert(0, UserAgentFilter(common.USERAGENT_STRING))
  3166. if common.LISTEN_USERNAME:
  3167. GAEProxyHandler.handler_filters.insert(0, AuthFilter(common.LISTEN_USERNAME, common.LISTEN_PASSWORD))
  3168. def main():
  3169. global __file__
  3170. __file__ = os.path.abspath(__file__)
  3171. if os.path.islink(__file__):
  3172. __file__ = getattr(os, 'readlink', lambda x: x)(__file__)
  3173. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  3174. logging.basicConfig(level=logging.DEBUG if common.LISTEN_DEBUGINFO else logging.INFO, format='%(levelname)s - %(asctime)s %(message)s', datefmt='[%b %d %H:%M:%S]')
  3175. pre_start()
  3176. CertUtil.check_ca()
  3177. sys.stderr.write(common.info())
  3178. uvent_enabled = 'uvent.loop' in sys.modules and isinstance(gevent.get_hub().loop, __import__('uvent').loop.UVLoop)
  3179. if common.PHP_ENABLE:
  3180. host, port = common.PHP_LISTEN.split(':')
  3181. HandlerClass = ((PHPProxyHandler, GreenForwardPHPProxyHandler) if not common.PROXY_ENABLE else (ProxyChainPHPProxyHandler, ProxyChainGreenForwardPHPProxyHandler))[uvent_enabled]
  3182. server = LocalProxyServer((host, int(port)), HandlerClass)
  3183. thread.start_new_thread(server.serve_forever, tuple())
  3184. if common.PAC_ENABLE:
  3185. server = LocalProxyServer((common.PAC_IP, common.PAC_PORT), PACProxyHandler)
  3186. thread.start_new_thread(server.serve_forever, tuple())
  3187. if common.DNS_ENABLE:
  3188. try:
  3189. sys.path += ['.']
  3190. from dnsproxy import DNSServer
  3191. host, port = common.DNS_LISTEN.split(':')
  3192. server = DNSServer((host, int(port)), dns_servers=common.DNS_SERVERS, dns_blacklist=common.DNS_BLACKLIST, dns_tcpover=common.DNS_TCPOVER)
  3193. thread.start_new_thread(server.serve_forever, tuple())
  3194. except ImportError:
  3195. logging.exception('GoAgent DNSServer requires dnslib and gevent 1.0')
  3196. sys.exit(-1)
  3197. HandlerClass = ((GAEProxyHandler, GreenForwardGAEProxyHandler) if not common.PROXY_ENABLE else (ProxyChainGAEProxyHandler, ProxyChainGreenForwardGAEProxyHandler))[uvent_enabled]
  3198. server = LocalProxyServer((common.LISTEN_IP, common.LISTEN_PORT), HandlerClass)
  3199. try:
  3200. server.serve_forever()
  3201. except SystemError as e:
  3202. if '(libev) select: ' in repr(e):
  3203. logging.error('PLEASE START GOAGENT BY uvent.bat')
  3204. sys.exit(-1)
  3205. if __name__ == '__main__':
  3206. main()