PageRenderTime 29ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/simpleproxy.py

https://github.com/greatagent3/esr
Python | 1799 lines | 1750 code | 42 blank | 7 comment | 64 complexity | 33bea6f419e09bed55ae146a4e7d03d1 MD5 | raw file
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. # Based on GoAgent 3.1.16 by Phus Lu <phus.lu@gmail.com>
  4. # Author: Wang Wei Qiang <wwqgtxx@gmail.com>
  5. __version__ = '3.0.0'
  6. import sys
  7. import os
  8. import glob
  9. reload(sys).setdefaultencoding('UTF-8')
  10. sys.dont_write_bytecode = True
  11. sys.path += glob.glob('%s/*.egg' % os.path.dirname(os.path.abspath(__file__)))
  12. sys.path += glob.glob('%s/lib/*.egg' % os.path.dirname(os.path.abspath(__file__)))
  13. try:
  14. import gevent
  15. import gevent.socket
  16. import gevent.server
  17. import gevent.queue
  18. import gevent.monkey
  19. gevent.monkey.patch_all(subprocess=True)
  20. except ImportError:
  21. gevent = None
  22. except TypeError:
  23. gevent.monkey.patch_all()
  24. sys.stderr.write('\033[31m Warning: Please update gevent to the latest 1.0 version!\033[0m\n')
  25. import errno
  26. import time
  27. import struct
  28. import collections
  29. import binascii
  30. import zlib
  31. import itertools
  32. import re
  33. import io
  34. import fnmatch
  35. import traceback
  36. import random
  37. import base64
  38. import string
  39. import hashlib
  40. import threading
  41. import thread
  42. import socket
  43. import ssl
  44. import select
  45. import Queue
  46. import SocketServer
  47. import ConfigParser
  48. import BaseHTTPServer
  49. import httplib
  50. import urllib
  51. import urllib2
  52. import urlparse
  53. try:
  54. import dnslib
  55. except ImportError:
  56. dnslib = None
  57. try:
  58. import OpenSSL
  59. except ImportError:
  60. OpenSSL = None
  61. try:
  62. import pygeoip
  63. except ImportError:
  64. pygeoip = None
  65. HAS_PYPY = hasattr(sys, 'pypy_version_info')
  66. NetWorkIOError = (socket.error, ssl.SSLError, OSError) if not OpenSSL else (socket.error, ssl.SSLError, OpenSSL.SSL.Error, OSError)
  67. class Logging(type(sys)):
  68. CRITICAL = 50
  69. FATAL = CRITICAL
  70. ERROR = 40
  71. WARNING = 30
  72. WARN = WARNING
  73. INFO = 20
  74. DEBUG = 10
  75. NOTSET = 0
  76. def __init__(self, *args, **kwargs):
  77. self.level = self.__class__.INFO
  78. self.__set_error_color = lambda: None
  79. self.__set_warning_color = lambda: None
  80. self.__set_debug_color = lambda: None
  81. self.__reset_color = lambda: None
  82. if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
  83. if os.name == 'nt':
  84. import ctypes
  85. SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute
  86. GetStdHandle = ctypes.windll.kernel32.GetStdHandle
  87. self.__set_error_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x04)
  88. self.__set_warning_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x06)
  89. self.__set_debug_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x002)
  90. self.__reset_color = lambda: SetConsoleTextAttribute(GetStdHandle(-11), 0x07)
  91. elif os.name == 'posix':
  92. self.__set_error_color = lambda: sys.stderr.write('\033[31m')
  93. self.__set_warning_color = lambda: sys.stderr.write('\033[33m')
  94. self.__set_debug_color = lambda: sys.stderr.write('\033[32m')
  95. self.__reset_color = lambda: sys.stderr.write('\033[0m')
  96. @classmethod
  97. def getLogger(cls, *args, **kwargs):
  98. return cls(*args, **kwargs)
  99. def basicConfig(self, *args, **kwargs):
  100. self.level = int(kwargs.get('level', self.__class__.INFO))
  101. if self.level > self.__class__.DEBUG:
  102. self.debug = self.dummy
  103. def log(self, level, fmt, *args, **kwargs):
  104. sys.stderr.write('%s - [%s] %s\n' % (level, time.ctime()[4:-5], fmt % args))
  105. def dummy(self, *args, **kwargs):
  106. pass
  107. def debug(self, fmt, *args, **kwargs):
  108. pass
  109. def info(self, fmt, *args, **kwargs):
  110. pass
  111. def warning(self, fmt, *args, **kwargs):
  112. self.__set_warning_color()
  113. self.log('WARNING', fmt, *args, **kwargs)
  114. self.__reset_color()
  115. def warn(self, fmt, *args, **kwargs):
  116. self.warning(fmt, *args, **kwargs)
  117. def error(self, fmt, *args, **kwargs):
  118. self.__set_error_color()
  119. self.log('ERROR', fmt, *args, **kwargs)
  120. self.__reset_color()
  121. def exception(self, fmt, *args, **kwargs):
  122. self.error(fmt, *args, **kwargs)
  123. sys.stderr.write(traceback.format_exc() + '\n')
  124. def critical(self, fmt, *args, **kwargs):
  125. self.__set_error_color()
  126. self.log('CRITICAL', fmt, *args, **kwargs)
  127. self.__reset_color()
  128. logging = sys.modules['logging'] = Logging('logging')
  129. class LRUCache(object):
  130. """http://pypi.python.org/pypi/lru/"""
  131. def __init__(self, max_items=100):
  132. self.cache = {}
  133. self.key_order = []
  134. self.max_items = max_items
  135. def __setitem__(self, key, value):
  136. self.cache[key] = value
  137. self._mark(key)
  138. def __getitem__(self, key):
  139. value = self.cache[key]
  140. self._mark(key)
  141. return value
  142. def __contains__(self, key):
  143. return key in self.cache
  144. def _mark(self, key):
  145. if key in self.key_order:
  146. self.key_order.remove(key)
  147. self.key_order.insert(0, key)
  148. if len(self.key_order) > self.max_items:
  149. index = self.max_items // 2
  150. delitem = self.cache.__delitem__
  151. key_order = self.key_order
  152. any(delitem(key_order[x]) for x in xrange(index, len(key_order)))
  153. self.key_order = self.key_order[:index]
  154. def clear(self):
  155. self.cache = {}
  156. self.key_order = []
  157. class CertUtil(object):
  158. """CertUtil module, based on mitmproxy"""
  159. ca_vendor = 'GoAgent'
  160. ca_certdir = 'certs'
  161. ca_lock = threading.Lock()
  162. @staticmethod
  163. def create_ca():
  164. key = OpenSSL.crypto.PKey()
  165. key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  166. ca = OpenSSL.crypto.X509()
  167. ca.set_serial_number(0)
  168. ca.set_version(2)
  169. subj = ca.get_subject()
  170. subj.countryName = 'CN'
  171. subj.stateOrProvinceName = 'Internet'
  172. subj.localityName = 'Cernet'
  173. subj.organizationName = CertUtil.ca_vendor
  174. subj.organizationalUnitName = '%s Root' % CertUtil.ca_vendor
  175. subj.commonName = '%s CA' % CertUtil.ca_vendor
  176. ca.gmtime_adj_notBefore(0)
  177. ca.gmtime_adj_notAfter(24 * 60 * 60 * 3652)
  178. ca.set_issuer(ca.get_subject())
  179. ca.set_pubkey(key)
  180. ca.add_extensions([
  181. OpenSSL.crypto.X509Extension(b'basicConstraints', True, b'CA:TRUE'),
  182. OpenSSL.crypto.X509Extension(b'nsCertType', True, b'sslCA'),
  183. OpenSSL.crypto.X509Extension(b'extendedKeyUsage', True, b'serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC'),
  184. OpenSSL.crypto.X509Extension(b'keyUsage', False, b'keyCertSign, cRLSign'),
  185. OpenSSL.crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', subject=ca), ])
  186. ca.sign(key, 'sha1')
  187. return key, ca
  188. @staticmethod
  189. def dump_ca():
  190. key, ca = CertUtil.create_ca()
  191. with open(CertUtil.ca_keyfile, 'wb') as fp:
  192. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, ca))
  193. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key))
  194. @staticmethod
  195. def _get_cert(commonname, sans=()):
  196. content = """
  197. -----BEGIN CERTIFICATE-----
  198. MIIDUjCCAjoCAQAwDQYJKoZIhvcNAQEFBQAwbzEVMBMGA1UECxMMR29BZ2VudCBS
  199. b290MRAwDgYDVQQKEwdHb0FnZW50MRMwEQYDVQQDEwpHb0FnZW50IENBMREwDwYD
  200. VQQIEwhJbnRlcm5ldDELMAkGA1UEBhMCQ04xDzANBgNVBAcTBkNlcm5ldDAeFw0x
  201. MTA0MjAxNzM3MzVaFw0zMTA0MjAxNzM3MzVaMG8xFTATBgNVBAsTDEdvQWdlbnQg
  202. Um9vdDEQMA4GA1UEChMHR29BZ2VudDETMBEGA1UEAxMKR29BZ2VudCBDQTERMA8G
  203. A1UECBMISW50ZXJuZXQxCzAJBgNVBAYTAkNOMQ8wDQYDVQQHEwZDZXJuZXQwggEi
  204. MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC0jV3yx3yGAHlQqzm4fbVascvT
  205. nyCdtParWBnQn5A3U9pJjI47SCo8j7FfeoYSL0mHbJ0mjafTnw+/ewb09AQIkdEl
  206. n6smojl7NOKs1Yhh0yldB6kQWiBPr/XKMBskmvcyjJEqkU6hwtibASaAZt+q5clT
  207. BJ2XRaeAaMDeDbYDchFa7MTNhoQMdQFu1UhqkJxtuVMBEs1/qPbx5O9pqy1RgAeK
  208. WvxyCzVRi2hHaTns+weZBJ6N71afyvr1etGqqtWVpjpobk1ZFBYk4xpznCbm4iqP
  209. Ar9nqdGDw1IJIdX0DyMJIJrpwOf94pAK9v6zG0jnsbMqromL18kEMXZgYSMlAgMB
  210. AAEwDQYJKoZIhvcNAQEFBQADggEBAASiRZFCcgQ8VsncB8wKG+bmN9UZhXLJYRGp
  211. m3KIUy/zG6mMWG/3TgkPn8ivNAkrk+1ul5SrRvot/Q7XWpb0/yKX0faX/512JF2G
  212. 220gopqo4amj+g7SBKxzW8VhLQF6dm99eUd27JbAzi5VKXR0dMFECk2rFlA5gAR5
  213. zzFijaXHuObMtd2S292wji79JWocA0z6WVM5Qokw4hRTsXWfXL0BJTL3i/xRrEzW
  214. sdecYFpNhaEKldjegazoqAqiAMJj7PDU1AqdprNsq+3/tAmCvn0URkas4QhkvtqS
  215. FO6OGm/PZe5GbkBpAKdfLYFfEMO17SAGHHqAsIKAFfuHYONRGSM=
  216. -----END CERTIFICATE-----
  217. -----BEGIN RSA PRIVATE KEY-----
  218. MIIEowIBAAKCAQEAtI1d8sd8hgB5UKs5uH21WrHL058gnbT2q1gZ0J+QN1PaSYyO
  219. O0gqPI+xX3qGEi9Jh2ydJo2n058Pv3sG9PQECJHRJZ+rJqI5ezTirNWIYdMpXQep
  220. EFogT6/1yjAbJJr3MoyRKpFOocLYmwEmgGbfquXJUwSdl0WngGjA3g22A3IRWuzE
  221. zYaEDHUBbtVIapCcbblTARLNf6j28eTvaastUYAHilr8cgs1UYtoR2k57PsHmQSe
  222. je9Wn8r69XrRqqrVlaY6aG5NWRQWJOMac5wm5uIqjwK/Z6nRg8NSCSHV9A8jCSCa
  223. 6cDn/eKQCvb+sxtI57GzKq6Ji9fJBDF2YGEjJQIDAQABAoIBACB3n2JN/xV1tlsM
  224. P1fuuxLxD+8hGVNivEy5jgLW/q8EVCePr+/3HSlAyauas8tHV5iTrnrFVF2Yp9NO
  225. A0U/MA5+cjaqzLMozt9Z9j0QNPMqbrC89Ojs3AyYXsGZ/veJKlSbtGsMMDCkgiD1
  226. hv/l/+iSY66bEN+n9eQAclY77vQVXLSoCMReVfbdUxU9Q1MywODGf5Kng84gTyT/
  227. zd+xEfFHz8zbCDyw3Hd3hGJ2FxN+yFz1uI29ORb3/R7N9dZgsWf2fsfiRVPGuhAH
  228. RNlDockImB+BKeidx14sMim5p7s8heVYkBVW3SIOEReqz59b8x4QVhhZrzYWSHNq
  229. Gi0pLiECgYEA26v6b+rsxT//PznJSEhLyrg1Jo6XeWmFlwZY0KoipH6sxX/YPrDZ
  230. bOPN8KvAHtRltRLFs3L2iRaO2jltjxHGVF4FSYrf5KSExuj6/ABHxWM0YtezfDwR
  231. hU1ORg5QwVegMoOgsphS8ts2xn6T6wIwpBgtFPY84A52IBVn5CHuQtkCgYEA0mk5
  232. EpnZfmMT5ldcZ7JlZrxfWKvDHIcuA0neIBsd4oIcEfRhDC3TolH6pB4z4SCqyYw3
  233. t5HMiTx8yz074mycTcOcXO1Cs49kMZwbzKziRXpUdCW4EIo0DG+6LqwetPgYzozg
  234. FeTiGQBHqjrzjBLZ3RfozICbo7dvYHkVLK92my0CgYBWNBjlDnW3ujN6Jj0cxnIn
  235. rT3+UXqTxJsN9wmnaPyLPMKkBlVf1JqeJo9MYLnV31fCRQmcMAMbLOUGMf8SY9FG
  236. jlbY00ylNwJ75DWJ6ro/dXy7RRZELHZbr0iGKVv7Y12UNR88tpXmg6vtHQMC+CsK
  237. Wgpm7XJaIpKsaHoKhl4vkQKBgBBBTsZwGkxYTSZDY4EjWBAax2brRhSDIPviDgX+
  238. 8k0YbiC493Jga/QjTzC0oJ9ozajqazeETP/hK2bsIR858s1TKlZHghqrHjty6vbh
  239. +E0TyUh7zX+BncnEK+cFJw4mCIyUd49ZcloqGl89VKlin3AkM7jwypVYS4Nxd0BP
  240. geM1AoGBALOWNmYm9d4gRhUv14oJRiA+e+4evswiWvVdnS6UJ4tst0NlEKWahtpR
  241. kdAjav8WV1n6IbkJC2L743Ozjb63z5w6p5O7OtTyYUWbLt1hvNkHlkNP8AjRQP8E
  242. +N2jjrMAdbEwahPNAX9QlzHpF62AfEGQ3oODUm06TGTq+yAPSyYm
  243. -----END RSA PRIVATE KEY-----
  244. """
  245. key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, content)
  246. ca = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, content)
  247. pkey = OpenSSL.crypto.PKey()
  248. pkey.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
  249. req = OpenSSL.crypto.X509Req()
  250. subj = req.get_subject()
  251. subj.countryName = 'CN'
  252. subj.stateOrProvinceName = 'Internet'
  253. subj.localityName = 'Cernet'
  254. subj.organizationalUnitName = '%s Branch' % CertUtil.ca_vendor
  255. if commonname[0] == '.':
  256. subj.commonName = '*' + commonname
  257. subj.organizationName = '*' + commonname
  258. sans = ['*'+commonname] + [x for x in sans if x != '*'+commonname]
  259. else:
  260. subj.commonName = commonname
  261. subj.organizationName = commonname
  262. sans = [commonname] + [x for x in sans if x != commonname]
  263. #req.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans)).encode()])
  264. req.set_pubkey(pkey)
  265. req.sign(pkey, 'sha1')
  266. cert = OpenSSL.crypto.X509()
  267. cert.set_version(2)
  268. try:
  269. cert.set_serial_number(int(hashlib.md5(commonname.encode('utf-8')).hexdigest(), 16))
  270. except OpenSSL.SSL.Error:
  271. cert.set_serial_number(int(time.time()*1000))
  272. cert.gmtime_adj_notBefore(0)
  273. cert.gmtime_adj_notAfter(60 * 60 * 24 * 3652)
  274. cert.set_issuer(ca.get_subject())
  275. cert.set_subject(req.get_subject())
  276. cert.set_pubkey(req.get_pubkey())
  277. if commonname[0] == '.':
  278. sans = ['*'+commonname] + [s for s in sans if s != '*'+commonname]
  279. else:
  280. sans = [commonname] + [s for s in sans if s != commonname]
  281. #cert.add_extensions([OpenSSL.crypto.X509Extension(b'subjectAltName', True, ', '.join('DNS: %s' % x for x in sans))])
  282. cert.sign(key, 'sha1')
  283. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  284. with open(certfile, 'wb') as fp:
  285. fp.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert))
  286. fp.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, pkey))
  287. return certfile
  288. @staticmethod
  289. def get_cert(commonname, sans=()):
  290. if commonname.count('.') >= 2 and len(commonname.split('.')[-2]) > 4:
  291. commonname = '.'+commonname.partition('.')[-1]
  292. certfile = os.path.join(CertUtil.ca_certdir, commonname + '.crt')
  293. if os.path.exists(certfile):
  294. return certfile
  295. elif OpenSSL is None:
  296. return CertUtil.ca_keyfile
  297. else:
  298. with CertUtil.ca_lock:
  299. if os.path.exists(certfile):
  300. return certfile
  301. return CertUtil._get_cert(commonname, sans)
  302. @staticmethod
  303. def import_ca(certfile):
  304. commonname = os.path.splitext(os.path.basename(certfile))[0]
  305. if OpenSSL:
  306. try:
  307. with open(certfile, 'rb') as fp:
  308. x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, fp.read())
  309. commonname = next(v.decode() for k, v in x509.get_subject().get_components() if k == b'O')
  310. except Exception as e:
  311. logging.error('load_certificate(certfile=%r) failed:%s', certfile, e)
  312. if sys.platform.startswith('win'):
  313. import ctypes
  314. with open(certfile, 'rb') as fp:
  315. certdata = fp.read()
  316. if certdata.startswith(b'-----'):
  317. begin = b'-----BEGIN CERTIFICATE-----'
  318. end = b'-----END CERTIFICATE-----'
  319. certdata = base64.b64decode(b''.join(certdata[certdata.find(begin)+len(begin):certdata.find(end)].strip().splitlines()))
  320. crypt32 = ctypes.WinDLL(b'crypt32.dll'.decode())
  321. store_handle = crypt32.CertOpenStore(10, 0, 0, 0x4000 | 0x20000, b'ROOT'.decode())
  322. if not store_handle:
  323. return -1
  324. if crypt32.CertFindCertificateInStore(store_handle, 0x1, 0, 0x80007, CertUtil.ca_vendor.decode(), None):
  325. return 0
  326. ret = crypt32.CertAddEncodedCertificateToStore(store_handle, 0x1, certdata, len(certdata), 4, None)
  327. crypt32.CertCloseStore(store_handle, 0)
  328. del crypt32
  329. return 0 if ret else -1
  330. elif sys.platform == 'darwin':
  331. return os.system(('security find-certificate -a -c "%s" | grep "%s" >/dev/null || security add-trusted-cert -d -r trustRoot -k "/Library/Keychains/System.keychain" "%s"' % (commonname, commonname, certfile.decode('utf-8'))).encode('utf-8'))
  332. elif sys.platform.startswith('linux'):
  333. import platform
  334. platform_distname = platform.dist()[0]
  335. if platform_distname == 'Ubuntu':
  336. pemfile = "/etc/ssl/certs/%s.pem" % commonname
  337. new_certfile = "/usr/local/share/ca-certificates/%s.crt" % commonname
  338. if not os.path.exists(pemfile):
  339. return os.system('cp "%s" "%s" && update-ca-certificates' % (certfile, new_certfile))
  340. elif any(os.path.isfile('%s/certutil' % x) for x in os.environ['PATH'].split(os.pathsep)):
  341. return os.system('certutil -L -d sql:$HOME/.pki/nssdb | grep "%s" || certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "%s" -i "%s"' % (commonname, commonname, certfile))
  342. else:
  343. logging.warning('please install *libnss3-tools* package to import GoAgent root ca')
  344. return 0
  345. @staticmethod
  346. def check_ca():
  347. #Check CA exists
  348. capath = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_keyfile)
  349. certdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), CertUtil.ca_certdir)
  350. if not os.path.exists(capath):
  351. if not OpenSSL:
  352. logging.critical('CA.key is not exist and OpenSSL is disabled, ABORT!')
  353. sys.exit(-1)
  354. if os.path.exists(certdir):
  355. if os.path.isdir(certdir):
  356. any(os.remove(x) for x in glob.glob(certdir+'/*.crt')+glob.glob(certdir+'/.*.crt'))
  357. else:
  358. os.remove(certdir)
  359. os.mkdir(certdir)
  360. CertUtil.dump_ca()
  361. if glob.glob('%s/*.key' % CertUtil.ca_certdir):
  362. for filename in glob.glob('%s/*.key' % CertUtil.ca_certdir):
  363. try:
  364. os.remove(filename)
  365. os.remove(os.path.splitext(filename)[0]+'.crt')
  366. except EnvironmentError:
  367. pass
  368. #Check CA imported
  369. if CertUtil.import_ca(capath) != 0:
  370. logging.warning('install root certificate failed, Please run as administrator/root/sudo')
  371. #Check Certs Dir
  372. if not os.path.exists(certdir):
  373. os.makedirs(certdir)
  374. class SSLConnection(object):
  375. """OpenSSL Connection Wapper"""
  376. def __init__(self, context, sock):
  377. self._context = context
  378. self._sock = sock
  379. self._connection = OpenSSL.SSL.Connection(context, sock)
  380. self._makefile_refs = 0
  381. def __getattr__(self, attr):
  382. if attr not in ('_context', '_sock', '_connection', '_makefile_refs'):
  383. return getattr(self._connection, attr)
  384. def __wait_sock_io(self, sock, io_func, *args, **kwargs):
  385. timeout = self._sock.gettimeout() or 0.1
  386. fd = self._sock.fileno()
  387. while True:
  388. try:
  389. return io_func(*args, **kwargs)
  390. except (OpenSSL.SSL.WantReadError, OpenSSL.SSL.WantX509LookupError):
  391. sys.exc_clear()
  392. _, _, errors = select.select([fd], [], [fd], timeout)
  393. if errors:
  394. break
  395. except OpenSSL.SSL.WantWriteError:
  396. sys.exc_clear()
  397. _, _, errors = select.select([], [fd], [fd], timeout)
  398. if errors:
  399. break
  400. def accept(self):
  401. sock, addr = self._sock.accept()
  402. client = OpenSSL.SSL.Connection(sock._context, sock)
  403. return client, addr
  404. def do_handshake(self):
  405. return self.__wait_sock_io(self._sock, self._connection.do_handshake)
  406. def connect(self, *args, **kwargs):
  407. return self.__wait_sock_io(self._sock, self._connection.connect, *args, **kwargs)
  408. def send(self, data, flags=0):
  409. try:
  410. return self.__wait_sock_io(self._sock, self._connection.send, data, flags)
  411. except OpenSSL.SSL.SysCallError as e:
  412. if e[0] == -1 and not data:
  413. # errors when writing empty strings are expected and can be ignored
  414. return 0
  415. raise
  416. def recv(self, bufsiz, flags=0):
  417. pending = self._connection.pending()
  418. if pending:
  419. return self._connection.recv(min(pending, bufsiz))
  420. try:
  421. return self.__wait_sock_io(self._sock, self._connection.recv, bufsiz, flags)
  422. except OpenSSL.SSL.ZeroReturnError:
  423. return ''
  424. def read(self, bufsiz, flags=0):
  425. return self.recv(bufsiz, flags)
  426. def write(self, buf, flags=0):
  427. return self.sendall(buf, flags)
  428. def close(self):
  429. if self._makefile_refs < 1:
  430. self._connection = None
  431. if self._sock:
  432. socket.socket.close(self._sock)
  433. else:
  434. self._makefile_refs -= 1
  435. def makefile(self, mode='r', bufsize=-1):
  436. self._makefile_refs += 1
  437. return socket._fileobject(self, mode, bufsize, close=True)
  438. def parse_hostport(host, default_port=80):
  439. m = re.match(r'(.+)[#](\d+)$', host)
  440. if m:
  441. return m.group(1).strip('[]'), int(m.group(2))
  442. else:
  443. return host.strip('[]'), default_port
  444. def dnslib_resolve_over_udp(query, dnsservers, timeout, **kwargs):
  445. """
  446. http://gfwrev.blogspot.com/2009/11/gfwdns.html
  447. http://zh.wikipedia.org/wiki/%E5%9F%9F%E5%90%8D%E6%9C%8D%E5%8A%A1%E5%99%A8%E7%BC%93%E5%AD%98%E6%B1%A1%E6%9F%93
  448. http://support.microsoft.com/kb/241352
  449. """
  450. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  451. raise TypeError('query argument requires string/DNSRecord')
  452. blacklist = kwargs.get('blacklist', ())
  453. turstservers = kwargs.get('turstservers', ())
  454. dns_v4_servers = [x for x in dnsservers if ':' not in x]
  455. dns_v6_servers = [x for x in dnsservers if ':' in x]
  456. sock_v4 = sock_v6 = None
  457. socks = []
  458. if dns_v4_servers:
  459. sock_v4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  460. socks.append(sock_v4)
  461. if dns_v6_servers:
  462. sock_v6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
  463. socks.append(sock_v6)
  464. timeout_at = time.time() + timeout
  465. try:
  466. for _ in xrange(4):
  467. try:
  468. for dnsserver in dns_v4_servers:
  469. if isinstance(query, basestring):
  470. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query))
  471. query_data = query.pack()
  472. sock_v4.sendto(query_data, parse_hostport(dnsserver, 53))
  473. for dnsserver in dns_v6_servers:
  474. if isinstance(query, basestring):
  475. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=dnslib.QTYPE.AAAA))
  476. query_data = query.pack()
  477. sock_v6.sendto(query_data, parse_hostport(dnsserver, 53))
  478. while time.time() < timeout_at:
  479. ins, _, _ = select.select(socks, [], [], 0.1)
  480. for sock in ins:
  481. reply_data, reply_address = sock.recvfrom(512)
  482. reply_server = reply_address[0]
  483. record = dnslib.DNSRecord.parse(reply_data)
  484. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  485. if any(x in blacklist for x in iplist):
  486. logging.debug('query=%r dnsservers=%r record bad iplist=%r', query, dnsservers, iplist)
  487. elif record.header.rcode and not iplist and reply_server in turstservers:
  488. logging.info('query=%r trust reply_server=%r record rcode=%s', query, reply_server, record.header.rcode)
  489. return record
  490. elif iplist:
  491. logging.debug('query=%r reply_server=%r record iplist=%s', query, reply_server, iplist)
  492. return record
  493. else:
  494. logging.debug('query=%r reply_server=%r record null iplist=%s', query, reply_server, iplist)
  495. continue
  496. except socket.error as e:
  497. logging.warning('handle dns query=%s socket: %r', query, e)
  498. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  499. finally:
  500. for sock in socks:
  501. sock.close()
  502. def dnslib_resolve_over_tcp(query, dnsservers, timeout, **kwargs):
  503. """dns query over tcp"""
  504. if not isinstance(query, (basestring, dnslib.DNSRecord)):
  505. raise TypeError('query argument requires string/DNSRecord')
  506. blacklist = kwargs.get('blacklist', ())
  507. def do_resolve(query, dnsserver, timeout, queobj):
  508. if isinstance(query, basestring):
  509. qtype = dnslib.QTYPE.AAAA if ':' in dnsserver else dnslib.QTYPE.A
  510. query = dnslib.DNSRecord(q=dnslib.DNSQuestion(query, qtype=qtype))
  511. query_data = query.pack()
  512. sock_family = socket.AF_INET6 if ':' in dnsserver else socket.AF_INET
  513. sock = socket.socket(sock_family)
  514. rfile = None
  515. try:
  516. sock.settimeout(timeout or None)
  517. sock.connect(parse_hostport(dnsserver, 53))
  518. sock.send(struct.pack('>h', len(query_data)) + query_data)
  519. rfile = sock.makefile('r', 1024)
  520. reply_data_length = rfile.read(2)
  521. if len(reply_data_length) < 2:
  522. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  523. reply_data = rfile.read(struct.unpack('>h', reply_data_length)[0])
  524. record = dnslib.DNSRecord.parse(reply_data)
  525. iplist = [str(x.rdata) for x in record.rr if x.rtype in (1, 28, 255)]
  526. if any(x in blacklist for x in iplist):
  527. logging.debug('query=%r dnsserver=%r record bad iplist=%r', query, dnsserver, iplist)
  528. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsserver))
  529. else:
  530. logging.debug('query=%r dnsserver=%r record iplist=%s', query, dnsserver, iplist)
  531. queobj.put(record)
  532. except socket.error as e:
  533. logging.debug('query=%r dnsserver=%r failed %r', query, dnsserver, e)
  534. queobj.put(e)
  535. finally:
  536. if rfile:
  537. rfile.close()
  538. sock.close()
  539. queobj = Queue.Queue()
  540. for dnsserver in dnsservers:
  541. thread.start_new_thread(do_resolve, (query, dnsserver, timeout, queobj))
  542. for i in range(len(dnsservers)):
  543. try:
  544. result = queobj.get(timeout)
  545. except Queue.Empty:
  546. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  547. if result and not isinstance(result, Exception):
  548. return result
  549. elif i == len(dnsservers) - 1:
  550. logging.warning('dnslib_resolve_over_tcp %r with %s return %r', query, dnsservers, result)
  551. raise socket.gaierror(11004, 'getaddrinfo %r from %r failed' % (query, dnsservers))
  552. def dnslib_record2iplist(record):
  553. """convert dnslib.DNSRecord to iplist"""
  554. assert isinstance(record, dnslib.DNSRecord)
  555. iplist = [x for x in (str(r.rdata) for r in record.rr) if re.match(r'^\d+\.\d+\.\d+\.\d+$', x) or ':' in x]
  556. return iplist
  557. def get_dnsserver_list():
  558. if os.name == 'nt':
  559. import ctypes, ctypes.wintypes, struct, socket
  560. DNS_CONFIG_DNS_SERVER_LIST = 6
  561. buf = ctypes.create_string_buffer(2048)
  562. ctypes.windll.dnsapi.DnsQueryConfig(DNS_CONFIG_DNS_SERVER_LIST, 0, None, None, ctypes.byref(buf), ctypes.byref(ctypes.wintypes.DWORD(len(buf))))
  563. ipcount = struct.unpack('I', buf[0:4])[0]
  564. iplist = [socket.inet_ntoa(buf[i:i+4]) for i in xrange(4, ipcount*4+4, 4)]
  565. return iplist
  566. elif os.path.isfile('/etc/resolv.conf'):
  567. with open('/etc/resolv.conf', 'rb') as fp:
  568. return re.findall(r'(?m)^nameserver\s+(\S+)', fp.read())
  569. else:
  570. logging.warning("get_dnsserver_list failed: unsupport platform '%s-%s'", sys.platform, os.name)
  571. return []
  572. def spawn_later(seconds, target, *args, **kwargs):
  573. def wrap(*args, **kwargs):
  574. __import__('time').sleep(seconds)
  575. return target(*args, **kwargs)
  576. return __import__('thread').start_new_thread(wrap, args, kwargs)
  577. def is_clienthello(data):
  578. if len(data) < 20:
  579. return False
  580. if data.startswith('\x16\x03'):
  581. # TLSv12/TLSv11/TLSv1/SSLv3
  582. length, = struct.unpack('>h', data[3:5])
  583. return len(data) == 5 + length
  584. elif data[0] == '\x80' and data[2:4] == '\x01\x03':
  585. # SSLv23
  586. return len(data) == 2 + ord(data[1])
  587. else:
  588. return False
  589. def extract_sni_name(packet):
  590. if packet.startswith('\x16\x03'):
  591. stream = io.BytesIO(packet)
  592. stream.read(0x2b)
  593. session_id_length = ord(stream.read(1))
  594. stream.read(session_id_length)
  595. cipher_suites_length, = struct.unpack('>h', stream.read(2))
  596. stream.read(cipher_suites_length+2)
  597. extensions_length, = struct.unpack('>h', stream.read(2))
  598. extensions = {}
  599. while True:
  600. data = stream.read(2)
  601. if not data:
  602. break
  603. etype, = struct.unpack('>h', data)
  604. elen, = struct.unpack('>h', stream.read(2))
  605. edata = stream.read(elen)
  606. if etype == 0:
  607. server_name = edata[5:]
  608. return server_name
  609. class URLFetch(object):
  610. """URLFetch for gae/php fetchservers"""
  611. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  612. def __init__(self, handler, fetchserver):
  613. assert isinstance(fetchserver, basestring) and callable(create_http_request)
  614. self.handler = handler
  615. self.fetchserver = fetchserver
  616. self.create_http_request = handler.create_http_request
  617. def fetch(self, method, url, headers, body, timeout, **kwargs):
  618. return self.__google_fetch(method, url, headers, body, timeout, **kwargs)
  619. def __google_fetch(self, method, url, headers, body, timeout, **kwargs):
  620. url = url.replace('http://', 'https://', 1)
  621. url = re.sub(r'^(\w+://)', r'\g<1>2-ps.googleusercontent.com/h/', url)
  622. #print url
  623. proxies = {'http':'%s:%s'%('127.0.0.1', common.LISTEN_PORT),'https':'%s:%s'%('127.0.0.1', common.LISTEN_PORT)}
  624. opener = urllib2.build_opener(urllib2.ProxyHandler(proxies))
  625. response = opener.open(url)
  626. #print response
  627. return response
  628. class BaseProxyHandlerFilter(object):
  629. """base proxy handler filter"""
  630. def filter(self, handler):
  631. raise NotImplementedError
  632. class SimpleProxyHandlerFilter(BaseProxyHandlerFilter):
  633. """simple proxy handler filter"""
  634. def filter(self, handler):
  635. if handler.command == 'CONNECT':
  636. return [handler.FORWARD, handler.host, handler.port, handler.connect_timeout]
  637. else:
  638. return [handler.DIRECT, {}]
  639. class SimpleProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
  640. """SimpleProxyHandler for GoAgent 3.x"""
  641. protocol_version = 'HTTP/1.1'
  642. ssl_version = ssl.PROTOCOL_SSLv23
  643. disable_transport_ssl = True
  644. scheme = 'http'
  645. skip_headers = frozenset(['Vary', 'Via', 'X-Forwarded-For', 'Proxy-Authorization', 'Proxy-Connection', 'Upgrade', 'X-Chrome-Variations', 'Connection', 'Cache-Control'])
  646. bufsize = 256 * 1024
  647. max_timeout = 4
  648. connect_timeout = 4
  649. first_run_lock = threading.Lock()
  650. handler_filters = [SimpleProxyHandlerFilter()]
  651. sticky_filter = None
  652. def finish(self):
  653. """make python2 BaseHTTPRequestHandler happy"""
  654. try:
  655. BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
  656. except NetWorkIOError as e:
  657. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  658. raise
  659. def address_string(self):
  660. return '%s:%s' % self.client_address[:2]
  661. def send_response(self, code, message=None):
  662. if message is None:
  663. if code in self.responses:
  664. message = self.responses[code][0]
  665. else:
  666. message = ''
  667. if self.request_version != 'HTTP/0.9':
  668. self.wfile.write('%s %d %s\r\n' % (self.protocol_version, code, message))
  669. def send_header(self, keyword, value):
  670. """Send a MIME header."""
  671. base_send_header = BaseHTTPServer.BaseHTTPRequestHandler.send_header
  672. keyword = keyword.title()
  673. if keyword == 'Set-Cookie':
  674. for cookie in re.split(r', (?=[^ =]+(?:=|$))', value):
  675. base_send_header(self, keyword, cookie)
  676. elif keyword == 'Content-Disposition' and '"' not in value:
  677. value = re.sub(r'filename=([^"\']+)', 'filename="\\1"', value)
  678. base_send_header(self, keyword, value)
  679. else:
  680. base_send_header(self, keyword, value)
  681. def setup(self):
  682. if isinstance(self.__class__.first_run, collections.Callable):
  683. try:
  684. with self.__class__.first_run_lock:
  685. if isinstance(self.__class__.first_run, collections.Callable):
  686. self.first_run()
  687. self.__class__.first_run = None
  688. except StandardError as e:
  689. logging.exception('%s.first_run() return %r', self.__class__, e)
  690. self.__class__.setup = BaseHTTPServer.BaseHTTPRequestHandler.setup
  691. self.__class__.do_CONNECT = self.__class__.do_METHOD
  692. self.__class__.do_GET = self.__class__.do_METHOD
  693. self.__class__.do_PUT = self.__class__.do_METHOD
  694. self.__class__.do_POST = self.__class__.do_METHOD
  695. self.__class__.do_HEAD = self.__class__.do_METHOD
  696. self.__class__.do_DELETE = self.__class__.do_METHOD
  697. self.__class__.do_OPTIONS = self.__class__.do_METHOD
  698. self.setup()
  699. def handle_one_request(self):
  700. if not self.disable_transport_ssl and self.scheme == 'http':
  701. leadbyte = self.connection.recv(1, socket.MSG_PEEK)
  702. if leadbyte in ('\x80', '\x16'):
  703. server_name = ''
  704. if leadbyte == '\x16':
  705. for _ in xrange(2):
  706. leaddata = self.connection.recv(1024, socket.MSG_PEEK)
  707. if is_clienthello(leaddata):
  708. try:
  709. server_name = extract_sni_name(leaddata)
  710. finally:
  711. break
  712. try:
  713. certfile = CertUtil.get_cert(server_name or 'www.google.com')
  714. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  715. except StandardError as e:
  716. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  717. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  718. return
  719. self.connection = ssl_sock
  720. self.rfile = self.connection.makefile('rb', self.bufsize)
  721. self.wfile = self.connection.makefile('wb', 0)
  722. self.scheme = 'https'
  723. return BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self)
  724. def first_run(self):
  725. pass
  726. def gethostbyname2(self, hostname):
  727. return socket.gethostbyname_ex(hostname)[-1]
  728. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  729. return socket.create_connection((hostname, port), timeout)
  730. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  731. sock = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  732. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version)
  733. return ssl_sock
  734. def create_http_request(self, method, url, headers, body, timeout, **kwargs):
  735. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  736. if netloc.rfind(':') <= netloc.rfind(']'):
  737. # no port number
  738. host = netloc
  739. port = 443 if scheme == 'https' else 80
  740. else:
  741. host, _, port = netloc.rpartition(':')
  742. port = int(port)
  743. if query:
  744. path += '?' + query
  745. if 'Host' not in headers:
  746. headers['Host'] = host
  747. if body and 'Content-Length' not in headers:
  748. headers['Content-Length'] = str(len(body))
  749. ConnectionType = httplib.HTTPSConnection if scheme == 'https' else httplib.HTTPConnection
  750. connection = ConnectionType(netloc, timeout=timeout)
  751. connection.request(method, path, body=body, headers=headers)
  752. response = connection.getresponse()
  753. return response
  754. def create_http_request_withserver(self, fetchserver, method, url, headers, body, timeout, **kwargs):
  755. return URLFetch(self, fetchserver).fetch(method, url, headers, body, timeout, **kwargs)
  756. def handle_urlfetch_error(self, fetchserver, response):
  757. pass
  758. def handle_urlfetch_response_close(self, fetchserver, response):
  759. pass
  760. def parse_header(self):
  761. if self.command == 'CONNECT':
  762. netloc = self.path
  763. elif self.path[0] == '/':
  764. netloc = self.headers.get('Host', 'localhost')
  765. self.path = '%s://%s%s' % (self.scheme, netloc, self.path)
  766. else:
  767. netloc = urlparse.urlsplit(self.path).netloc
  768. m = re.match(r'^(.+):(\d+)$', netloc)
  769. if m:
  770. self.host = m.group(1).strip('[]')
  771. self.port = int(m.group(2))
  772. else:
  773. self.host = netloc
  774. self.port = 443 if self.scheme == 'https' else 80
  775. def forward_socket(self, local, remote, timeout):
  776. try:
  777. tick = 1
  778. bufsize = self.bufsize
  779. timecount = timeout
  780. while 1:
  781. timecount -= tick
  782. if timecount <= 0:
  783. break
  784. (ins, _, errors) = select.select([local, remote], [], [local, remote], tick)
  785. if errors:
  786. break
  787. for sock in ins:
  788. data = sock.recv(bufsize)
  789. if not data:
  790. break
  791. if sock is remote:
  792. local.sendall(data)
  793. timecount = timeout
  794. else:
  795. remote.sendall(data)
  796. timecount = timeout
  797. except socket.timeout:
  798. pass
  799. except NetWorkIOError as e:
  800. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE):
  801. raise
  802. if e.args[0] in (errno.EBADF,):
  803. return
  804. finally:
  805. for sock in (remote, local):
  806. try:
  807. sock.close()
  808. except StandardError:
  809. pass
  810. def MOCK(self, status, headers, content):
  811. """mock response"""
  812. logging.info('%s "MOCK %s %s %s" %d %d', self.address_string(), self.command, self.path, self.protocol_version, status, len(content))
  813. headers = dict((k.title(), v) for k, v in headers.items())
  814. if 'Transfer-Encoding' in headers:
  815. del headers['Transfer-Encoding']
  816. if 'Content-Length' not in headers:
  817. headers['Content-Length'] = len(content)
  818. if 'Connection' not in headers:
  819. headers['Connection'] = 'close'
  820. self.send_response(status)
  821. for key, value in headers.items():
  822. self.send_header(key, value)
  823. self.end_headers()
  824. self.wfile.write(content)
  825. def STRIP(self, do_ssl_handshake=True, sticky_filter=None):
  826. """strip connect"""
  827. certfile = CertUtil.get_cert(self.host)
  828. logging.info('%s "STRIP %s %s:%d %s" - -', self.address_string(), self.command, self.host, self.port, self.protocol_version)
  829. self.send_response(200)
  830. self.end_headers()
  831. if do_ssl_handshake:
  832. try:
  833. ssl_sock = ssl.wrap_socket(self.connection, ssl_version=self.ssl_version, keyfile=certfile, certfile=certfile, server_side=True)
  834. except StandardError as e:
  835. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET):
  836. logging.exception('ssl.wrap_socket(self.connection=%r) failed: %s', self.connection, e)
  837. return
  838. self.connection = ssl_sock
  839. self.rfile = self.connection.makefile('rb', self.bufsize)
  840. self.wfile = self.connection.makefile('wb', 0)
  841. self.scheme = 'https'
  842. try:
  843. self.raw_requestline = self.rfile.readline(65537)
  844. if len(self.raw_requestline) > 65536:
  845. self.requestline = ''
  846. self.request_version = ''
  847. self.command = ''
  848. self.send_error(414)
  849. return
  850. if not self.raw_requestline:
  851. self.close_connection = 1
  852. return
  853. if not self.parse_request():
  854. return
  855. except NetWorkIOError as e:
  856. if e.args[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  857. raise
  858. self.sticky_filter = sticky_filter
  859. try:
  860. self.do_METHOD()
  861. except NetWorkIOError as e:
  862. if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
  863. raise
  864. def FORWARD(self, hostname, port, timeout, kwargs={}):
  865. """forward socket"""
  866. do_ssl_handshake = kwargs.pop('do_ssl_handshake', False)
  867. local = self.connection
  868. remote = None
  869. self.send_response(200)
  870. self.end_headers()
  871. self.close_connection = 1
  872. data = local.recv(1024)
  873. if not data:
  874. local.close()
  875. return
  876. data_is_clienthello = is_clienthello(data)
  877. if data_is_clienthello:
  878. kwargs['client_hello'] = data
  879. max_retry = kwargs.get('max_retry', 5)
  880. for i in xrange(max_retry):
  881. try:
  882. if do_ssl_handshake:
  883. remote = self.create_ssl_connection(hostname, port, timeout, **kwargs)
  884. else:
  885. remote = self.create_tcp_connection(hostname, port, timeout, **kwargs)
  886. if not data_is_clienthello and remote and not isinstance(remote, Exception):
  887. remote.sendall(data)
  888. break
  889. except StandardError as e:
  890. logging.exception('%s "FWD %s %s:%d %s" %r', self.address_string(), self.command, hostname, port, self.protocol_version, e)
  891. if hasattr(remote, 'close'):
  892. remote.close()
  893. if i == max_retry - 1:
  894. raise
  895. logging.info('%s "FWD %s %s:%d %s" - -', self.address_string(), self.command, hostname, port, self.protocol_version)
  896. if hasattr(remote, 'fileno'):
  897. # reset timeout default to avoid long http upload failure, but it will delay timeout retry :(
  898. remote.settimeout(None)
  899. del kwargs
  900. data = data_is_clienthello and getattr(remote, 'data', None)
  901. if data:
  902. del remote.data
  903. local.sendall(data)
  904. self.forward_socket(local, remote, self.max_timeout)
  905. def DIRECT(self, kwargs):
  906. method = self.command
  907. if 'url' in kwargs:
  908. url = kwargs.pop('url')
  909. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  910. url = self.path
  911. else:
  912. url = 'http://%s%s' % (self.headers['Host'], self.path)
  913. headers = dict((k.title(), v) for k, v in self.headers.items())
  914. body = self.body
  915. response = None
  916. try:
  917. response = self.create_http_request(method, url, headers, body, timeout=self.connect_timeout, **kwargs)
  918. logging.info('%s "DIRECT %s %s %s" %s %s', self.address_string(), self.command, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  919. response_headers = dict((k.title(), v) for k, v in response.getheaders())
  920. self.send_response(response.status)
  921. for key, value in response.getheaders():
  922. self.send_header(key, value)
  923. self.end_headers()
  924. if self.command == 'HEAD' or response.status in (204, 304):
  925. response.close()
  926. return
  927. need_chunked = 'Transfer-Encoding' in response_headers
  928. while True:
  929. data = response.read(8192)
  930. if not data:
  931. if need_chunked:
  932. self.wfile.write('0\r\n\r\n')
  933. break
  934. if need_chunked:
  935. self.wfile.write('%x\r\n' % len(data))
  936. self.wfile.write(data)
  937. if need_chunked:
  938. self.wfile.write('\r\n')
  939. del data
  940. except (ssl.SSLError, socket.timeout, socket.error):
  941. if response:
  942. if response.fp and response.fp._sock:
  943. response.fp._sock.close()
  944. response.close()
  945. finally:
  946. if response:
  947. response.close()
  948. def URLFETCH(self, fetchservers, max_retry=5, kwargs={}):
  949. """urlfetch from fetchserver"""
  950. method = self.command
  951. if self.path[0] == '/':
  952. url = '%s://%s%s' % (self.scheme, self.headers['Host'], self.path)
  953. elif self.path.lower().startswith(('http://', 'https://', 'ftp://')):
  954. url = self.path
  955. else:
  956. raise ValueError('URLFETCH %r is not a valid url' % self.path)
  957. headers = dict((k.title(), v) for k, v in self.headers.items())
  958. body = self.body
  959. response = None
  960. errors = []
  961. fetchserver = ''
  962. for i in xrange(max_retry):
  963. try:
  964. response = self.create_http_request_withserver(fetchserver, method, url, headers, body, timeout=60, **kwargs)
  965. if response:
  966. break
  967. except StandardError as e:
  968. errors.append(e)
  969. logging.info('URLFETCH "%s %s" fetchserver=%r %r, retry...', method, url, fetchserver, e)
  970. #logging.info('%s "URL %s %s %s" %s %s', self.address_string(), method, url, self.protocol_version, response.status, response.getheader('Content-Length', '-'))
  971. try:
  972. bufsize = 8192
  973. while True:
  974. data = response.read(bufsize)
  975. if data:
  976. self.wfile.write(data)
  977. #print 'read'
  978. if not data:
  979. self.handle_urlfetch_response_close(fetchserver, response)
  980. response.close()
  981. break
  982. del data
  983. except NetWorkIOError as e:
  984. if e[0] in (errno.ECONNABORTED, errno.EPIPE) or 'bad write retry' in repr(e):
  985. return
  986. def do_METHOD(self):
  987. self.parse_header()
  988. self.body = self.rfile.read(int(self.headers['Content-Length'])) if 'Content-Length' in self.headers else ''
  989. if self.sticky_filter:
  990. action = self.sticky_filter.filter(self)
  991. if action:
  992. return action.pop(0)(*action)
  993. for handler_filter in self.handler_filters:
  994. action = handler_filter.filter(self)
  995. if action:
  996. return action.pop(0)(*action)
  997. class AdvancedProxyHandler(SimpleProxyHandler):
  998. """Advanced Proxy Handler"""
  999. dns_cache = LRUCache(64*1024)
  1000. dns_servers = []
  1001. dns_blacklist = []
  1002. tcp_connection_time = collections.defaultdict(float)
  1003. tcp_connection_time_with_clienthello = collections.defaultdict(float)
  1004. tcp_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1005. ssl_connection_time = collections.defaultdict(float)
  1006. ssl_connection_cache = collections.defaultdict(Queue.PriorityQueue)
  1007. ssl_connection_good_ipaddrs = {}
  1008. ssl_connection_bad_ipaddrs = {}
  1009. ssl_connection_unknown_ipaddrs = {}
  1010. ssl_connection_keepalive = False
  1011. max_window = 4
  1012. openssl_context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
  1013. def gethostbyname2(self, hostname):
  1014. try:
  1015. iplist = self.dns_cache[hostname]
  1016. except KeyError:
  1017. if re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  1018. iplist = [hostname]
  1019. elif self.dns_servers:
  1020. try:
  1021. record = dnslib_resolve_over_udp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1022. except socket.gaierror:
  1023. record = dnslib_resolve_over_tcp(hostname, self.dns_servers, timeout=2, blacklist=self.dns_blacklist)
  1024. iplist = dnslib_record2iplist(record)
  1025. else:
  1026. iplist = socket.gethostbyname_ex(hostname)[-1]
  1027. self.dns_cache[hostname] = iplist
  1028. return iplist
  1029. def create_tcp_connection(self, hostname, port, timeout, **kwargs):
  1030. client_hello = kwargs.get('client_hello', None)
  1031. cache_key = kwargs.get('cache_key') if not client_hello else None
  1032. def create_connection(ipaddr, timeout, queobj):
  1033. sock = None
  1034. try:
  1035. # create a ipv4/ipv6 socket object
  1036. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1037. # set reuseaddr option to avoid 10048 socket error
  1038. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1039. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1040. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1041. # disable nagle algorithm to send http request quickly.
  1042. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1043. # set a short timeout to trigger timeout retry more quickly.
  1044. sock.settimeout(min(self.connect_timeout, timeout))
  1045. # start connection time record
  1046. start_time = time.time()
  1047. # TCP connect
  1048. sock.connect(ipaddr)
  1049. # record TCP connection time
  1050. self.tcp_connection_time[ipaddr] = time.time() - start_time
  1051. # send client hello and peek server hello
  1052. if client_hello:
  1053. sock.sendall(client_hello)
  1054. if gevent and isinstance(sock, gevent.socket.socket):
  1055. sock.data = data = sock.recv(4096)
  1056. else:
  1057. data = sock.recv(4096, socket.MSG_PEEK)
  1058. if not data:
  1059. logging.debug('create_tcp_connection %r with client_hello return NULL byte, continue %r', ipaddr, time.time()-start_time)
  1060. raise socket.timeout('timed out')
  1061. # record TCP connection time with client hello
  1062. self.tcp_connection_time_with_clienthello[ipaddr] = time.time() - start_time
  1063. # set timeout
  1064. sock.settimeout(timeout)
  1065. # put tcp socket object to output queobj
  1066. queobj.put(sock)
  1067. except (socket.error, OSError) as e:
  1068. # any socket.error, put Excpetions to output queobj.
  1069. queobj.put(e)
  1070. # reset a large and random timeout to the ipaddr
  1071. self.tcp_connection_time[ipaddr] = self.connect_timeout+random.random()
  1072. # close tcp socket
  1073. if sock:
  1074. sock.close()
  1075. def close_connection(count, queobj, first_tcp_time):
  1076. for _ in range(count):
  1077. sock = queobj.get()
  1078. tcp_time_threshold = min(1, 1.3 * first_tcp_time)
  1079. if sock and not isinstance(sock, Exception):
  1080. ipaddr = sock.getpeername()
  1081. if cache_key and self.tcp_connection_time[ipaddr] < tcp_time_threshold:
  1082. cache_queue = self.tcp_connection_cache[cache_key]
  1083. if cache_queue.qsize() < 8:
  1084. try:
  1085. _, old_sock = cache_queue.get_nowait()
  1086. old_sock.close()
  1087. except Queue.Empty:
  1088. pass
  1089. cache_queue.put((time.time(), sock))
  1090. else:
  1091. sock.close()
  1092. try:
  1093. while cache_key:
  1094. ctime, sock = self.tcp_connection_cache[cache_key].get_nowait()
  1095. if time.time() - ctime < 30:
  1096. return sock
  1097. else:
  1098. sock.close()
  1099. except Queue.Empty:
  1100. pass
  1101. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1102. sock = None
  1103. for _ in range(kwargs.get('max_retry', 5)):
  1104. window = min((self.max_window+1)//2, len(addresses))
  1105. if client_hello:
  1106. addresses.sort(key=self.tcp_connection_time_with_clienthello.__getitem__)
  1107. else:
  1108. addresses.sort(key=self.tcp_connection_time.__getitem__)
  1109. addrs = addresses[:window] + random.sample(addresses, window)
  1110. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1111. for addr in addrs:
  1112. thread.start_new_thread(create_connection, (addr, timeout, queobj))
  1113. for i in range(len(addrs)):
  1114. sock = queobj.get()
  1115. if not isinstance(sock, Exception):
  1116. first_tcp_time = self.tcp_connection_time[sock.getpeername()] if not cache_key else 0
  1117. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, first_tcp_time))
  1118. return sock
  1119. elif i == 0:
  1120. # only output first error
  1121. logging.warning('create_tcp_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1122. if isinstance(sock, Exception):
  1123. raise sock
  1124. def create_ssl_connection(self, hostname, port, timeout, **kwargs):
  1125. cache_key = kwargs.get('cache_key')
  1126. validate = kwargs.get('validate')
  1127. def create_connection(ipaddr, timeout, queobj):
  1128. sock = None
  1129. ssl_sock = None
  1130. try:
  1131. # create a ipv4/ipv6 socket object
  1132. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1133. # set reuseaddr option to avoid 10048 socket error
  1134. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1135. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1136. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1137. # disable negal algorithm to send http request quickly.
  1138. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1139. # set a short timeout to trigger timeout retry more quickly.
  1140. sock.settimeout(min(self.connect_timeout, timeout))
  1141. # pick up the certificate
  1142. if not validate:
  1143. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, do_handshake_on_connect=False)
  1144. else:
  1145. ssl_sock = ssl.wrap_socket(sock, ssl_version=self.ssl_version, cert_reqs=ssl.CERT_REQUIRED, ca_certs=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cacert.pem'), do_handshake_on_connect=False)
  1146. ssl_sock.settimeout(min(self.connect_timeout, timeout))
  1147. # start connection time record
  1148. start_time = time.time()
  1149. # TCP connect
  1150. ssl_sock.connect(ipaddr)
  1151. connected_time = time.time()
  1152. # SSL handshake
  1153. ssl_sock.do_handshake()
  1154. handshaked_time = time.time()
  1155. # record TCP connection time
  1156. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1157. # record SSL connection time
  1158. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1159. ssl_sock.ssl_time = connected_time - start_time
  1160. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1161. ssl_sock.sock = sock
  1162. # remove from bad ipaddrs dict
  1163. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1164. # add to good ipaddrs dict
  1165. if ipaddr not in self.ssl_connection_good_ipaddrs:
  1166. self.ssl_connection_good_ipaddrs[ipaddr] = handshaked_time
  1167. # verify SSL certificate.
  1168. if validate and hostname.endswith('.appspot.com'):
  1169. cert = ssl_sock.getpeercert()
  1170. orgname = next((v for ((k, v),) in cert['subject'] if k == 'organizationName'))
  1171. if not orgname.lower().startswith('google '):
  1172. raise ssl.SSLError("%r certificate organizationName(%r) not startswith 'Google'" % (hostname, orgname))
  1173. # set timeout
  1174. ssl_sock.settimeout(timeout)
  1175. # put ssl socket object to output queobj
  1176. queobj.put(ssl_sock)
  1177. except (socket.error, ssl.SSLError, OSError) as e:
  1178. # any socket.error, put Excpetions to output queobj.
  1179. queobj.put(e)
  1180. # reset a large and random timeout to the ipaddr
  1181. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1182. # add to bad ipaddrs dict
  1183. if ipaddr not in self.ssl_connection_bad_ipaddrs:
  1184. self.ssl_connection_bad_ipaddrs[ipaddr] = time.time()
  1185. # remove from good ipaddrs dict
  1186. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1187. # close ssl socket
  1188. if ssl_sock:
  1189. ssl_sock.close()
  1190. # close tcp socket
  1191. if sock:
  1192. sock.close()
  1193. def create_connection_withopenssl(ipaddr, timeout, queobj):
  1194. sock = None
  1195. ssl_sock = None
  1196. try:
  1197. # create a ipv4/ipv6 socket object
  1198. sock = socket.socket(socket.AF_INET if ':' not in ipaddr[0] else socket.AF_INET6)
  1199. # set reuseaddr option to avoid 10048 socket error
  1200. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  1201. # resize socket recv buffer 8K->32K to improve browser releated application performance
  1202. sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 32*1024)
  1203. # disable negal algorithm to send http request quickly.
  1204. sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True)
  1205. # set a short timeout to trigger timeout retry more quickly.
  1206. sock.settimeout(timeout or self.connect_timeout)
  1207. # pick up the certificate
  1208. server_hostname = b'mail.google.com' if hostname.endswith('.appspot.com') else None
  1209. ssl_sock = SSLConnection(self.openssl_context, sock)
  1210. ssl_sock.set_connect_state()
  1211. if server_hostname and hasattr(ssl_sock, 'set_tlsext_host_name'):
  1212. ssl_sock.set_tlsext_host_name(server_hostname)
  1213. # start connection time record
  1214. start_time = time.time()
  1215. # TCP connect
  1216. ssl_sock.connect(ipaddr)
  1217. connected_time = time.time()
  1218. # SSL handshake
  1219. ssl_sock.do_handshake()
  1220. handshaked_time = time.time()
  1221. # record TCP connection time
  1222. self.tcp_connection_time[ipaddr] = ssl_sock.tcp_time = connected_time - start_time
  1223. # record SSL connection time
  1224. self.ssl_connection_time[ipaddr] = ssl_sock.ssl_time = handshaked_time - start_time
  1225. # sometimes, we want to use raw tcp socket directly(select/epoll), so setattr it to ssl socket.
  1226. ssl_sock.sock = sock
  1227. # remove from bad ipaddrs dict
  1228. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1229. # add to good ipaddrs dict
  1230. if ipaddr not in self.ssl_connection_good_ipaddrs:
  1231. self.ssl_connection_good_ipaddrs[ipaddr] = handshaked_time
  1232. # verify SSL certificate.
  1233. if validate and hostname.endswith('.appspot.com'):
  1234. cert = ssl_sock.get_peer_certificate()
  1235. commonname = next((v for k, v in cert.get_subject().get_components() if k == 'CN'))
  1236. if '.google' not in commonname and not commonname.endswith('.appspot.com'):
  1237. raise socket.error("Host name '%s' doesn't match certificate host '%s'" % (hostname, commonname))
  1238. # put ssl socket object to output queobj
  1239. queobj.put(ssl_sock)
  1240. except (socket.error, OpenSSL.SSL.Error, OSError) as e:
  1241. # any socket.error, put Excpetions to output queobj.
  1242. queobj.put(e)
  1243. # reset a large and random timeout to the ipaddr
  1244. self.ssl_connection_time[ipaddr] = self.connect_timeout + random.random()
  1245. # add to bad ipaddrs dict
  1246. if ipaddr not in self.ssl_connection_bad_ipaddrs:
  1247. self.ssl_connection_bad_ipaddrs[ipaddr] = time.time()
  1248. # remove from good ipaddrs dict
  1249. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1250. # close ssl socket
  1251. if ssl_sock:
  1252. ssl_sock.close()
  1253. # close tcp socket
  1254. if sock:
  1255. sock.close()
  1256. def close_connection(count, queobj, first_tcp_time, first_ssl_time):
  1257. for _ in range(count):
  1258. sock = queobj.get()
  1259. ssl_time_threshold = min(1, 1.3 * first_ssl_time)
  1260. if sock and not isinstance(sock, Exception):
  1261. if cache_key and sock.ssl_time < ssl_time_threshold:
  1262. cache_queue = self.ssl_connection_cache[cache_key]
  1263. if cache_queue.qsize() < 8:
  1264. try:
  1265. _, old_sock = cache_queue.get_nowait()
  1266. old_sock.close()
  1267. except Queue.Empty:
  1268. pass
  1269. cache_queue.put((time.time(), sock))
  1270. else:
  1271. sock.close()
  1272. def reorg_ipaddrs():
  1273. current_time = time.time()
  1274. for ipaddr, ctime in self.ssl_connection_good_ipaddrs.items():
  1275. if current_time - ctime > 5 * 60:
  1276. self.ssl_connection_good_ipaddrs.pop(ipaddr, None)
  1277. self.ssl_connection_unknown_ipaddrs[ipaddr] = ctime
  1278. for ipaddr, ctime in self.ssl_connection_bad_ipaddrs.items():
  1279. if current_time - ctime > 5 * 60:
  1280. self.ssl_connection_bad_ipaddrs.pop(ipaddr, None)
  1281. self.ssl_connection_unknown_ipaddrs[ipaddr] = ctime
  1282. logging.info("good_ipaddrs=%d, bad_ipaddrs=%d, unkown_ipaddrs=%d", len(self.ssl_connection_good_ipaddrs), len(self.ssl_connection_bad_ipaddrs), len(self.ssl_connection_unknown_ipaddrs))
  1283. try:
  1284. while cache_key:
  1285. ctime, sock = self.ssl_connection_cache[cache_key].get_nowait()
  1286. if time.time() - ctime < 30:
  1287. return sock
  1288. else:
  1289. sock.close()
  1290. except Queue.Empty:
  1291. pass
  1292. addresses = [(x, port) for x in self.gethostbyname2(hostname)]
  1293. sock = None
  1294. for i in range(kwargs.get('max_retry', 10)):
  1295. window = self.max_window + i
  1296. good_addrs = [x for x in addresses if x in self.ssl_connection_good_ipaddrs]
  1297. if len(good_addrs) > window:
  1298. good_addrs = sorted(good_addrs, key=self.ssl_connection_time.get)[:window]
  1299. unkown_ipaddrs = [x for x in addresses if x not in self.ssl_connection_good_ipaddrs and x not in self.ssl_connection_bad_ipaddrs]
  1300. if len(unkown_ipaddrs) > window:
  1301. random.shuffle(unkown_ipaddrs)
  1302. unkown_ipaddrs = unkown_ipaddrs[:window]
  1303. bad_ipaddrs = [x for x in addresses if x in self.ssl_connection_bad_ipaddrs]
  1304. if len(bad_ipaddrs) > window:
  1305. bad_ipaddrs = sorted(bad_ipaddrs, key=self.ssl_connection_bad_ipaddrs.get)[:max(window, 3*window-len(good_addrs)-len(unkown_ipaddrs))]
  1306. addrs = good_addrs + bad_ipaddrs + unkown_ipaddrs
  1307. queobj = gevent.queue.Queue() if gevent else Queue.Queue()
  1308. for addr in addrs:
  1309. thread.start_new_thread(create_connection_withopenssl, (addr, timeout, queobj))
  1310. for i in range(len(addrs)):
  1311. sock = queobj.get()
  1312. if not isinstance(sock, Exception):
  1313. thread.start_new_thread(close_connection, (len(addrs)-i-1, queobj, sock.tcp_time, sock.ssl_time))
  1314. return sock
  1315. elif i == 0:
  1316. # only output first error
  1317. logging.warning('create_ssl_connection to %r with %s return %r, try again.', hostname, addrs, sock)
  1318. reorg_ipaddrs()
  1319. if isinstance(sock, Exception):
  1320. raise sock
  1321. def create_http_request(self, method, url, headers, body, timeout, max_retry=5, bufsize=8192, crlf=None, validate=None, cache_key=None):
  1322. scheme, netloc, path, query, _ = urlparse.urlsplit(url)
  1323. if netloc.rfind(':') <= netloc.rfind(']'):
  1324. # no port number
  1325. host = netloc
  1326. port = 443 if scheme == 'https' else 80
  1327. else:
  1328. host, _, port = netloc.rpartition(':')
  1329. port = int(port)
  1330. if query:
  1331. path += '?' + query
  1332. if 'Host' not in headers:
  1333. headers['Host'] = host
  1334. if body and 'Content-Length' not in headers:
  1335. headers['Content-Length'] = str(len(body))
  1336. sock = None
  1337. for i in range(max_retry):
  1338. try:
  1339. create_connection = self.create_ssl_connection if scheme == 'https' else self.create_tcp_connection
  1340. sock = create_connection(host, port, timeout, validate=validate, cache_key=cache_key)
  1341. break
  1342. except StandardError as e:
  1343. logging.exception('create_http_request "%s %s" failed:%s', method, url, e)
  1344. if sock:
  1345. sock.close()
  1346. if i == max_retry - 1:
  1347. raise
  1348. request_data = ''
  1349. crlf_counter = 0
  1350. if scheme != 'https' and crlf:
  1351. fakeheaders = dict((k.title(), v) for k, v in headers.items())
  1352. fakeheaders.pop('Content-Length', None)
  1353. fakeheaders.pop('Cookie', None)
  1354. fakeheaders.pop('Host', None)
  1355. if 'User-Agent' not in fakeheaders:
  1356. fakeheaders['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1878.0 Safari/537.36'
  1357. if 'Accept-Language' not in fakeheaders:
  1358. fakeheaders['Accept-Language'] = 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4'
  1359. if 'Accept' not in fakeheaders:
  1360. fakeheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
  1361. fakeheaders_data = ''.join('%s: %s\r\n' % (k, v) for k, v in fakeheaders.items() if k not in self.skip_headers)
  1362. while crlf_counter < 5 or len(request_data) < 1500 * 2:
  1363. request_data += 'GET / HTTP/1.1\r\n%s\r\n' % fakeheaders_data
  1364. crlf_counter += 1
  1365. request_data += '\r\n\r\n\r\n'
  1366. request_data += '%s %s %s\r\n' % (method, path, self.protocol_version)
  1367. request_data += ''.join('%s: %s\r\n' % (k.title(), v) for k, v in headers.items() if k.title() not in self.skip_headers)
  1368. request_data += '\r\n'
  1369. if isinstance(body, bytes):
  1370. sock.sendall(request_data.encode() + body)
  1371. elif hasattr(body, 'read'):
  1372. sock.sendall(request_data)
  1373. while 1:
  1374. data = body.read(bufsize)
  1375. if not data:
  1376. break
  1377. sock.sendall(data)
  1378. else:
  1379. raise TypeError('create_http_request(body) must be a string or buffer, not %r' % type(body))
  1380. response = None
  1381. try:
  1382. while crlf_counter:
  1383. if sys.version[:3] == '2.7':
  1384. response = httplib.HTTPResponse(sock, buffering=False)
  1385. else:
  1386. response = httplib.HTTPResponse(sock)
  1387. response.fp.close()
  1388. response.fp = sock.makefile('rb', 0)
  1389. response.begin()
  1390. response.read()
  1391. response.close()
  1392. crlf_counter -= 1
  1393. except StandardError as e:
  1394. logging.exception('crlf skip read host=%r path=%r error: %r', headers.get('Host'), path, e)
  1395. if response:
  1396. if response.fp and response.fp._sock:
  1397. response.fp._sock.close()
  1398. response.close()
  1399. if sock:
  1400. sock.close()
  1401. return None
  1402. if sys.version[:3] == '2.7':
  1403. response = httplib.HTTPResponse(sock, buffering=True)
  1404. else:
  1405. response = httplib.HTTPResponse(sock)
  1406. response.fp.close()
  1407. response.fp = sock.makefile('rb')
  1408. response.begin()
  1409. if self.ssl_connection_keepalive and scheme == 'https' and cache_key:
  1410. response.cache_key = cache_key
  1411. response.cache_sock = response.fp._sock
  1412. return response
  1413. def handle_urlfetch_response_close(self, fetchserver, response):
  1414. cache_sock = getattr(response, 'cache_sock', None)
  1415. if cache_sock:
  1416. if self.scheme == 'https':
  1417. self.ssl_connection_cache[response.cache_key].put((time.time(), cache_sock))
  1418. else:
  1419. cache_sock.close()
  1420. del response.cache_sock
  1421. def handle_urlfetch_error(self, fetchserver, response):
  1422. pass
  1423. class Common(object):
  1424. """Global Config Object"""
  1425. ENV_CONFIG_PREFIX = 'GOAGENT_'
  1426. def __init__(self):
  1427. """load config from proxy.ini"""
  1428. ConfigParser.RawConfigParser.OPTCRE = re.compile(r'(?P<option>[^=\s][^=]*)\s*(?P<vi>[=])\s*(?P<value>.*)$')
  1429. self.CONFIG = ConfigParser.ConfigParser()
  1430. self.CONFIG_FILENAME = os.path.splitext(os.path.abspath(__file__))[0]+'.ini'
  1431. self.CONFIG_USER_FILENAME = re.sub(r'\.ini$', '.user.ini', self.CONFIG_FILENAME)
  1432. self.CONFIG_MY_FILENAME = re.sub(r'\.ini$', '.my.ini', self.CONFIG_FILENAME)
  1433. self.CONFIG.read([self.CONFIG_FILENAME, self.CONFIG_USER_FILENAME, self.CONFIG_MY_FILENAME])
  1434. for key, value in os.environ.items():
  1435. m = re.match(r'^%s([A-Z]+)_([A-Z\_\-]+)$' % self.ENV_CONFIG_PREFIX, key)
  1436. if m:
  1437. self.CONFIG.set(m.group(1).lower(), m.group(2).lower(), value)
  1438. self.LISTEN_IP = self.CONFIG.get('listen', 'ip')
  1439. self.LISTEN_PORT = self.CONFIG.getint('listen', 'port')
  1440. self.LISTEN_USERNAME = self.CONFIG.get('listen', 'username') if self.CONFIG.has_option('listen', 'username') else ''
  1441. self.LISTEN_PASSWORD = self.CONFIG.get('listen', 'password') if self.CONFIG.has_option('listen', 'password') else ''
  1442. self.LISTEN_VISIBLE = self.CONFIG.getint('listen', 'visible')
  1443. self.LISTEN_DEBUGINFO = self.CONFIG.getint('listen', 'debuginfo')
  1444. self.GAE_MODE = self.CONFIG.get('gae', 'mode')
  1445. self.GAE_PROFILE = self.CONFIG.get('gae', 'profile').strip()
  1446. self.GAE_WINDOW = self.CONFIG.getint('gae', 'window')
  1447. self.GAE_KEEPALIVE = self.CONFIG.getint('gae', 'keepalive') if self.CONFIG.has_option('gae', 'keepalive') else 0
  1448. self.GAE_SSLVERSION = self.CONFIG.get('gae', 'sslversion')
  1449. if self.GAE_PROFILE == 'auto':
  1450. try:
  1451. socket.create_connection(('2001:4860:4860::8888', 53), timeout=1).close()
  1452. logging.info('Use profile ipv6')
  1453. self.GAE_PROFILE = 'ipv6'
  1454. except socket.error as e:
  1455. logging.info('Fail try profile ipv6 %r, fallback ipv4', e)
  1456. self.GAE_PROFILE = 'ipv4'
  1457. hosts_section, http_section = '%s/hosts' % self.GAE_PROFILE, '%s/http' % self.GAE_PROFILE
  1458. if 'USERDNSDOMAIN' in os.environ and re.match(r'^\w+\.\w+$', os.environ['USERDNSDOMAIN']):
  1459. self.CONFIG.set(hosts_section, '.' + os.environ['USERDNSDOMAIN'], '')
  1460. self.HOST_MAP = collections.OrderedDict((k, v or k) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and not k.startswith('.'))
  1461. self.HOST_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if '\\' not in k and ':' not in k and k.startswith('.'))
  1462. self.HOST_POSTFIX_ENDSWITH = tuple(self.HOST_POSTFIX_MAP)
  1463. self.HOSTPORT_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and not k.startswith('.'))
  1464. self.HOSTPORT_POSTFIX_MAP = collections.OrderedDict((k, v) for k, v in self.CONFIG.items(hosts_section) if ':' in k and k.startswith('.'))
  1465. self.HOSTPORT_POSTFIX_ENDSWITH = tuple(self.HOSTPORT_POSTFIX_MAP)
  1466. self.URLRE_MAP = collections.OrderedDict((re.compile(k).match, v) for k, v in self.CONFIG.items(hosts_section) if '\\' in k)
  1467. self.IPLIST_MAP = collections.OrderedDict((k, v.split('|')) for k, v in self.CONFIG.items('iplist'))
  1468. self.IPLIST_MAP.update((k, [k]) for k, v in self.HOST_MAP.items() if k == v)
  1469. self.FETCHMAX_LOCAL = self.CONFIG.getint('fetchmax', 'local') if self.CONFIG.get('fetchmax', 'local') else 3
  1470. self.FETCHMAX_SERVER = self.CONFIG.get('fetchmax', 'server')
  1471. self.DNS_ENABLE = self.CONFIG.getint('dns', 'enable')
  1472. self.DNS_LISTEN = self.CONFIG.get('dns', 'listen')
  1473. self.DNS_SERVERS = self.CONFIG.get('dns', 'servers').split('|')
  1474. self.DNS_BLACKLIST = set(self.CONFIG.get('dns', 'blacklist').split('|'))
  1475. self.DNS_TCPOVER = tuple(self.CONFIG.get('dns', 'tcpover').split('|')) if self.CONFIG.get('dns', 'tcpover').strip() else tuple()
  1476. self.USERAGENT_ENABLE = self.CONFIG.getint('useragent', 'enable')
  1477. self.USERAGENT_STRING = self.CONFIG.get('useragent', 'string')
  1478. def resolve_iplist(self):
  1479. def do_resolve(host, dnsservers, queue):
  1480. iplist = []
  1481. for dnslib_resolve in (dnslib_resolve_over_tcp,):
  1482. try:
  1483. iplist += dnslib_record2iplist(dnslib_resolve_over_udp(host, dnsservers, timeout=4, blacklist=self.DNS_BLACKLIST))
  1484. except (socket.error, OSError) as e:
  1485. logging.debug('%r remote host=%r failed: %s', dnslib_resolve, host, e)
  1486. queue.put((host, dnsservers, iplist))
  1487. # https://support.google.com/websearch/answer/186669?hl=zh-Hans
  1488. google_blacklist = ['216.239.32.20'] + list(self.DNS_BLACKLIST)
  1489. for name, need_resolve_hosts in list(self.IPLIST_MAP.items()):
  1490. if all(re.match(r'\d+\.\d+\.\d+\.\d+', x) or ':' in x for x in need_resolve_hosts):
  1491. continue
  1492. need_resolve_remote = [x for x in need_resolve_hosts if ':' not in x and not re.match(r'\d+\.\d+\.\d+\.\d+', x)]
  1493. resolved_iplist = [x for x in need_resolve_hosts if x not in need_resolve_remote]
  1494. result_queue = Queue.Queue()
  1495. for host in need_resolve_remote:
  1496. for dnsserver in self.DNS_SERVERS:
  1497. logging.debug('resolve remote host=%r from dnsserver=%r', host, dnsserver)
  1498. thread.start_new_thread(do_resolve, (host, [dnsserver], result_queue))
  1499. for _ in xrange(len(self.DNS_SERVERS) * len(need_resolve_remote)):
  1500. try:
  1501. host, dnsservers, iplist = result_queue.get(timeout=5)
  1502. resolved_iplist += iplist or []
  1503. logging.debug('resolve remote host=%r from dnsservers=%s return iplist=%s', host, dnsservers, iplist)
  1504. except Queue.Empty:
  1505. logging.warn('resolve remote timeout, try resolve local')
  1506. resolved_iplist += sum([socket.gethostbyname_ex(x)[-1] for x in need_resolve_remote], [])
  1507. break
  1508. if name.startswith('google_') and name not in ('google_cn', 'google_hk') and resolved_iplist:
  1509. iplist_prefix = re.split(r'[\.:]', resolved_iplist[0])[0]
  1510. resolved_iplist = list(set(x for x in resolved_iplist if x.startswith(iplist_prefix)))
  1511. else:
  1512. resolved_iplist = list(set(resolved_iplist))
  1513. if name.startswith('google_'):
  1514. resolved_iplist = list(set(resolved_iplist) - set(google_blacklist))
  1515. if len(resolved_iplist) == 0:
  1516. logging.error('resolve %s host return empty! please retry!', name)
  1517. sys.exit(-1)
  1518. logging.info('resolve name=%s host to iplist=%r', name, resolved_iplist)
  1519. common.IPLIST_MAP[name] = resolved_iplist
  1520. def info(self):
  1521. info = ''
  1522. info += '------------------------------------------------------\n'
  1523. info += 'GreatAgent SimpleProxy Version : %s (python/%s %spyopenssl/%s)\n' % (__version__, sys.version[:5], gevent and 'gevent/%s ' % gevent.__version__ or '', getattr(OpenSSL, '__version__', 'Disabled'))
  1524. info += 'Listen Address : %s:%d\n' % (self.LISTEN_IP, self.LISTEN_PORT)
  1525. info += 'Debug INFO : %s\n' % self.LISTEN_DEBUGINFO if self.LISTEN_DEBUGINFO else ''
  1526. info += 'GOOGLE Mode : %s\n' % self.GAE_MODE
  1527. info += 'GOOGLE Profile : %s\n' % self.GAE_PROFILE if self.GAE_PROFILE else ''
  1528. info += '------------------------------------------------------\n'
  1529. return info
  1530. common = Common()
  1531. def message_html(title, banner, detail=''):
  1532. MESSAGE_TEMPLATE = '''
  1533. <html><head>
  1534. <meta http-equiv="content-type" content="text/html;charset=utf-8">
  1535. <title>$title</title>
  1536. <style><!--
  1537. body {font-family: arial,sans-serif}
  1538. div.nav {margin-top: 1ex}
  1539. div.nav A {font-size: 10pt; font-family: arial,sans-serif}
  1540. span.nav {font-size: 10pt; font-family: arial,sans-serif; font-weight: bold}
  1541. div.nav A,span.big {font-size: 12pt; color: #0000cc}
  1542. div.nav A {font-size: 10pt; color: black}
  1543. A.l:link {color: #6f6f6f}
  1544. A.u:link {color: green}
  1545. //--></style>
  1546. </head>
  1547. <body text=#000000 bgcolor=#ffffff>
  1548. <table border=0 cellpadding=2 cellspacing=0 width=100%>
  1549. <tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff><b>Message From LocalProxy</b></td></tr>
  1550. <tr><td> </td></tr></table>
  1551. <blockquote>
  1552. <H1>$banner</H1>
  1553. $detail
  1554. <p>
  1555. </blockquote>
  1556. <table width=100% cellpadding=0 cellspacing=0><tr><td bgcolor=#3366cc><img alt="" width=1 height=4></td></tr></table>
  1557. </body></html>
  1558. '''
  1559. return string.Template(MESSAGE_TEMPLATE).substitute(title=title, banner=banner, detail=detail)
  1560. class LocalProxyServer(SocketServer.ThreadingTCPServer):
  1561. """Local Proxy Server"""
  1562. allow_reuse_address = True
  1563. daemon_threads = True
  1564. def close_request(self, request):
  1565. try:
  1566. request.close()
  1567. except StandardError:
  1568. pass
  1569. def finish_request(self, request, client_address):
  1570. try:
  1571. self.RequestHandlerClass(request, client_address, self)
  1572. except NetWorkIOError as e:
  1573. if e[0] not in (errno.ECONNABORTED, errno.ECONNRESET, errno.EPIPE):
  1574. raise
  1575. def handle_error(self, *args):
  1576. """make ThreadingTCPServer happy"""
  1577. exc_info = sys.exc_info()
  1578. error = exc_info and len(exc_info) and exc_info[1]
  1579. if isinstance(error, NetWorkIOError) and len(error.args) > 1 and 'bad write retry' in error.args[1]:
  1580. exc_info = error = None
  1581. else:
  1582. del exc_info, error
  1583. SocketServer.ThreadingTCPServer.handle_error(self, *args)
  1584. class UserAgentFilter(BaseProxyHandlerFilter):
  1585. """user agent filter"""
  1586. def filter(self, handler):
  1587. if common.USERAGENT_ENABLE:
  1588. handler.headers['User-Agent'] = common.USERAGENT_STRING
  1589. class FakeHttpsFilter(BaseProxyHandlerFilter):
  1590. """fake https filter"""
  1591. def filter(self, handler):
  1592. if handler.command == 'CONNECT':
  1593. return [handler.STRIP, True, None]
  1594. class HostsFilter(BaseProxyHandlerFilter):
  1595. """force https filter"""
  1596. def filter(self, handler):
  1597. host, port = handler.host, handler.port
  1598. hostport = handler.path if handler.command == 'CONNECT' else '%s:%d' % (host, port)
  1599. hostname = ''
  1600. if host in common.HOST_MAP:
  1601. hostname = common.HOST_MAP[host] or host
  1602. elif host.endswith(common.HOST_POSTFIX_ENDSWITH):
  1603. hostname = next(common.HOST_POSTFIX_MAP[x] for x in common.HOST_POSTFIX_MAP if host.endswith(x)) or host
  1604. common.HOST_MAP[host] = hostname
  1605. if hostport in common.HOSTPORT_MAP:
  1606. hostname = common.HOSTPORT_MAP[hostport] or host
  1607. elif hostport.endswith(common.HOSTPORT_POSTFIX_ENDSWITH):
  1608. hostname = next(common.HOSTPORT_POSTFIX_MAP[x] for x in common.HOSTPORT_POSTFIX_MAP if hostport.endswith(x)) or host
  1609. common.HOSTPORT_MAP[hostport] = hostname
  1610. if handler.command != 'CONNECT' and common.URLRE_MAP:
  1611. try:
  1612. hostname = next(common.URLRE_MAP[x] for x in common.URLRE_MAP if x(handler.path)) or host
  1613. except StopIteration:
  1614. pass
  1615. if not hostname:
  1616. return None
  1617. elif hostname in common.IPLIST_MAP:
  1618. handler.dns_cache[host] = common.IPLIST_MAP[hostname]
  1619. elif hostname == host and host.endswith(common.DNS_TCPOVER) and host not in handler.dns_cache:
  1620. try:
  1621. iplist = dnslib_record2iplist(dnslib_resolve_over_tcp(host, handler.dns_servers, timeout=4, blacklist=handler.dns_blacklist))
  1622. logging.info('HostsFilter dnslib_resolve_over_tcp %r with %r return %s', host, handler.dns_servers, iplist)
  1623. handler.dns_cache[host] = iplist
  1624. except socket.error as e:
  1625. logging.warning('HostsFilter dnslib_resolve_over_tcp %r with %r failed: %r', host, handler.dns_servers, e)
  1626. elif re.match(r'^\d+\.\d+\.\d+\.\d+$', hostname) or ':' in hostname:
  1627. handler.dns_cache[host] = [hostname]
  1628. cache_key = '%s:%s' % (hostname, port)
  1629. if handler.command == 'CONNECT':
  1630. return [handler.FORWARD, host, port, handler.connect_timeout, {'cache_key': cache_key}]
  1631. else:
  1632. return [handler.DIRECT, {'cache_key': cache_key}]
  1633. class GAEFetchFilter(BaseProxyHandlerFilter):
  1634. """force https filter"""
  1635. def filter(self, handler):
  1636. """https://developers.google.com/appengine/docs/python/urlfetch/"""
  1637. return [handler.URLFETCH, '', common.FETCHMAX_LOCAL, {}]
  1638. class GAEProxyHandler(AdvancedProxyHandler):
  1639. """GAE Proxy Handler"""
  1640. handler_filters = [UserAgentFilter(), FakeHttpsFilter(),HostsFilter(), GAEFetchFilter()]
  1641. def first_run(self):
  1642. """GAEProxyHandler setup, init domain/iplist map"""
  1643. logging.info('resolve common.IPLIST_MAP names=%s to iplist', list(common.IPLIST_MAP))
  1644. common.resolve_iplist()
  1645. server = LocalProxyServer((common.LISTEN_IP, common.LISTEN_PORT), GAEProxyHandler)