PageRenderTime 62ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/request/basic.py

https://github.com/akoserwal/sqlmap
Python | 320 lines | 278 code | 24 blank | 18 comment | 55 complexity | 8dbd458f64a390e6f62ed17f728fb4f3 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. #!/usr/bin/env python
  2. """
  3. Copyright (c) 2006-2014 sqlmap developers (http://sqlmap.org/)
  4. See the file 'doc/COPYING' for copying permission
  5. """
  6. import codecs
  7. import gzip
  8. import logging
  9. import re
  10. import StringIO
  11. import struct
  12. import zlib
  13. from lib.core.common import extractErrorMessage
  14. from lib.core.common import extractRegexResult
  15. from lib.core.common import getPublicTypeMembers
  16. from lib.core.common import getUnicode
  17. from lib.core.common import readInput
  18. from lib.core.common import resetCookieJar
  19. from lib.core.common import singleTimeLogMessage
  20. from lib.core.common import singleTimeWarnMessage
  21. from lib.core.data import conf
  22. from lib.core.data import kb
  23. from lib.core.data import logger
  24. from lib.core.enums import HTTP_HEADER
  25. from lib.core.enums import PLACE
  26. from lib.core.exception import SqlmapCompressionException
  27. from lib.core.settings import DEFAULT_COOKIE_DELIMITER
  28. from lib.core.settings import EVENTVALIDATION_REGEX
  29. from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
  30. from lib.core.settings import ML
  31. from lib.core.settings import META_CHARSET_REGEX
  32. from lib.core.settings import PARSE_HEADERS_LIMIT
  33. from lib.core.settings import VIEWSTATE_REGEX
  34. from lib.parse.headers import headersParser
  35. from lib.parse.html import htmlParser
  36. from lib.utils.htmlentities import htmlEntities
  37. from thirdparty.chardet import detect
  38. def forgeHeaders(items=None):
  39. """
  40. Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
  41. the HTTP requests
  42. """
  43. items = items or {}
  44. for _ in items.keys():
  45. if items[_] is None:
  46. del items[_]
  47. headers = dict(conf.httpHeaders)
  48. headers.update(items or {})
  49. class _str(str):
  50. def capitalize(self):
  51. return _str(self)
  52. def title(self):
  53. return _str(self)
  54. _ = headers
  55. headers = {}
  56. for key, value in _.items():
  57. success = False
  58. if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
  59. try:
  60. headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
  61. except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
  62. pass
  63. else:
  64. success = True
  65. if not success:
  66. key = '-'.join(_.capitalize() for _ in key.split('-'))
  67. headers[key] = value
  68. if conf.cj:
  69. if HTTP_HEADER.COOKIE in headers:
  70. for cookie in conf.cj:
  71. if cookie.domain_specified and not conf.hostname.endswith(cookie.domain):
  72. continue
  73. if ("%s=" % cookie.name) in headers[HTTP_HEADER.COOKIE]:
  74. if conf.loadCookies:
  75. conf.httpHeaders = filter(None, ((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders))
  76. elif kb.mergeCookies is None:
  77. message = "you provided a HTTP %s header value. " % HTTP_HEADER.COOKIE
  78. message += "The target URL provided its own cookies within "
  79. message += "the HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
  80. message += "Do you want to merge them in futher requests? [Y/n] "
  81. _ = readInput(message, default="Y")
  82. kb.mergeCookies = not _ or _[0] in ("y", "Y")
  83. if kb.mergeCookies:
  84. _ = lambda x: re.sub("(?i)%s=[^%s]+" % (cookie.name, conf.cDel or DEFAULT_COOKIE_DELIMITER), "%s=%s" % (cookie.name, cookie.value), x)
  85. headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
  86. if PLACE.COOKIE in conf.parameters:
  87. conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
  88. conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
  89. elif not kb.testMode:
  90. headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cDel or DEFAULT_COOKIE_DELIMITER, cookie.name, cookie.value)
  91. if kb.testMode:
  92. resetCookieJar(conf.cj)
  93. return headers
  94. def parseResponse(page, headers):
  95. """
  96. @param page: the page to parse to feed the knowledge base htmlFp
  97. (back-end DBMS fingerprint based upon DBMS error messages return
  98. through the web application) list and absFilePaths (absolute file
  99. paths) set.
  100. """
  101. if headers:
  102. headersParser(headers)
  103. if page:
  104. htmlParser(page)
  105. def checkCharEncoding(encoding, warn=True):
  106. """
  107. Checks encoding name, repairs common misspellings and adjusts to
  108. proper namings used in codecs module
  109. >>> checkCharEncoding('iso-8858', False)
  110. 'iso8859-1'
  111. >>> checkCharEncoding('en_us', False)
  112. 'utf8'
  113. """
  114. if encoding:
  115. encoding = encoding.lower()
  116. else:
  117. return encoding
  118. # Reference: http://www.destructor.de/charsets/index.htm
  119. translate = {"windows-874": "iso-8859-11", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1"}
  120. for delimiter in (';', ',', '('):
  121. if delimiter in encoding:
  122. encoding = encoding[:encoding.find(delimiter)].strip()
  123. # popular typos/errors
  124. if "8858" in encoding:
  125. encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
  126. elif "8559" in encoding:
  127. encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
  128. elif "5889" in encoding:
  129. encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
  130. elif "5589" in encoding:
  131. encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
  132. elif "2313" in encoding:
  133. encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
  134. elif encoding.startswith("x-"):
  135. encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
  136. elif "windows-cp" in encoding:
  137. encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
  138. # name adjustment for compatibility
  139. if encoding.startswith("8859"):
  140. encoding = "iso-%s" % encoding
  141. elif encoding.startswith("cp-"):
  142. encoding = "cp%s" % encoding[3:]
  143. elif encoding.startswith("euc-"):
  144. encoding = "euc_%s" % encoding[4:]
  145. elif encoding.startswith("windows") and not encoding.startswith("windows-"):
  146. encoding = "windows-%s" % encoding[7:]
  147. elif encoding.find("iso-88") > 0:
  148. encoding = encoding[encoding.find("iso-88"):]
  149. elif encoding.startswith("is0-"):
  150. encoding = "iso%s" % encoding[4:]
  151. elif encoding.find("ascii") > 0:
  152. encoding = "ascii"
  153. elif encoding.find("utf8") > 0:
  154. encoding = "utf8"
  155. # Reference: http://philip.html5.org/data/charsets-2.html
  156. if encoding in translate:
  157. encoding = translate[encoding]
  158. elif encoding in ("null", "{charset}", "*"):
  159. return None
  160. # Reference: http://www.iana.org/assignments/character-sets
  161. # Reference: http://docs.python.org/library/codecs.html
  162. try:
  163. codecs.lookup(encoding)
  164. except LookupError:
  165. if warn:
  166. warnMsg = "unknown web page charset '%s'. " % encoding
  167. warnMsg += "Please report by e-mail to %s." % ML
  168. singleTimeLogMessage(warnMsg, logging.WARN, encoding)
  169. encoding = None
  170. return encoding
  171. def getHeuristicCharEncoding(page):
  172. """
  173. Returns page encoding charset detected by usage of heuristics
  174. Reference: http://chardet.feedparser.org/docs/
  175. """
  176. retVal = detect(page)["encoding"]
  177. if retVal:
  178. infoMsg = "heuristics detected web page charset '%s'" % retVal
  179. singleTimeLogMessage(infoMsg, logging.INFO, retVal)
  180. return retVal
  181. def decodePage(page, contentEncoding, contentType):
  182. """
  183. Decode compressed/charset HTTP response
  184. """
  185. if not page or (conf.nullConnection and len(page) < 2):
  186. return getUnicode(page)
  187. if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ("gzip", "x-gzip", "deflate"):
  188. if not kb.pageCompress:
  189. return None
  190. try:
  191. if contentEncoding.lower() == "deflate":
  192. data = StringIO.StringIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
  193. else:
  194. data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
  195. size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
  196. if size > MAX_CONNECTION_TOTAL_SIZE:
  197. raise Exception("size too large")
  198. page = data.read()
  199. except Exception, msg:
  200. errMsg = "detected invalid data for declared content "
  201. errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
  202. singleTimeLogMessage(errMsg, logging.ERROR)
  203. warnMsg = "turning off page compression"
  204. singleTimeWarnMessage(warnMsg)
  205. kb.pageCompress = False
  206. raise SqlmapCompressionException
  207. if not conf.charset:
  208. httpCharset, metaCharset = None, None
  209. # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
  210. if contentType and (contentType.find("charset=") != -1):
  211. httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
  212. metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
  213. if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset)))\
  214. or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
  215. kb.pageEncoding = httpCharset or metaCharset
  216. debugMsg = "declared web page charset '%s'" % kb.pageEncoding
  217. singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
  218. else:
  219. kb.pageEncoding = None
  220. else:
  221. kb.pageEncoding = conf.charset
  222. # can't do for all responses because we need to support binary files too
  223. if contentType and not isinstance(page, unicode) and "text/" in contentType.lower():
  224. # e.g. &#195;&#235;&#224;&#226;&#224;
  225. if "&#" in page:
  226. page = re.sub(r"&#(\d{1,3});", lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
  227. # e.g. %20%28%29
  228. if "%" in page:
  229. page = re.sub(r"%([0-9a-fA-F]{2})", lambda _: _.group(1).decode("hex"), page)
  230. # e.g. &amp;
  231. page = re.sub(r"&([^;]+);", lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
  232. kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
  233. page = getUnicode(page, kb.pageEncoding)
  234. # e.g. &#8217;&#8230;&#8482;
  235. if "&#" in page:
  236. def _(match):
  237. retVal = match.group(0)
  238. try:
  239. retVal = unichr(int(match.group(1)))
  240. except ValueError:
  241. pass
  242. return retVal
  243. page = re.sub(r"&#(\d+);", _, page)
  244. # e.g. &zeta;
  245. page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
  246. return page
  247. def processResponse(page, responseHeaders):
  248. kb.processResponseCounter += 1
  249. parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)
  250. if conf.parseErrors:
  251. msg = extractErrorMessage(page)
  252. if msg:
  253. logger.warning("parsed DBMS error message: '%s'" % msg)
  254. if kb.originalPage is None:
  255. for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
  256. match = re.search(regex, page)
  257. if match and PLACE.POST in conf.parameters:
  258. name, value = match.groups()
  259. if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
  260. if conf.paramDict[PLACE.POST][name] in page:
  261. continue
  262. conf.paramDict[PLACE.POST][name] = value
  263. conf.parameters[PLACE.POST] = re.sub("(?i)(%s=)[^&]+" % name, r"\g<1>%s" % value, conf.parameters[PLACE.POST])