PageRenderTime 38ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/nselib/http.lua

https://github.com/prakashgamit/nmap
Lua | 2631 lines | 2341 code | 60 blank | 230 comment | 100 complexity | 3ff393fff2b09af3b55b259727e3d697 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, LGPL-2.0, LGPL-2.1
  1. ---Implements the HTTP client protocol in a standard form that Nmap scripts can
  2. -- take advantage of.
  3. --
  4. -- Because HTTP has so many uses, there are a number of interfaces to this library.
  5. -- The most obvious and common ones are simply <code>get</code>, <code>post</code>,
  6. -- and <code>head</code>; or, if more control is required, <code>generic_request</code>
  7. -- can be used. These functions do what one would expect. The <code>get_url</code>
  8. -- helper function can be used to parse and retrieve a full URL.
  9. --
  10. -- These functions return a table of values, including:
  11. -- * <code>status-line</code> - A string representing the status, such as "HTTP/1.1 200 OK". In case of an error, a description will be provided in this line.
  12. -- * <code>status</code>: The HTTP status value; for example, "200". If an error occurs during a request, then this value is going to be nil.
  13. -- * <code>header</code> - An associative array representing the header. Keys are all lowercase, and standard headers, such as 'date', 'content-length', etc. will typically be present.
  14. -- * <code>rawheader</code> - A numbered array of the headers, exactly as the server sent them. While header['content-type'] might be 'text/html', rawheader[3] might be 'Content-type: text/html'.
  15. -- * <code>cookies</code> - A numbered array of the cookies the server sent. Each cookie is a table with the following keys: <code>name</code>, <code>value</code>, <code>path</code>, <code>domain</code>, and <code>expires</code>.
  16. -- * <code>body</code> - The full body, as returned by the server.
  17. --
  18. -- If a script is planning on making a lot of requests, the pipelining functions can
  19. -- be helpful. <code>pipeline_add</code> queues requests in a table, and
  20. -- <code>pipeline</code> performs the requests, returning the results as an array,
  21. -- with the responses in the same order as the queries were added. As a simple example:
  22. --<code>
  23. -- -- Start by defining the 'all' variable as nil
  24. -- local all = nil
  25. --
  26. -- -- Add two 'GET' requests and one 'HEAD' to the queue. These requests are not performed
  27. -- -- yet. The second parameter represents the 'options' table, which we don't need.
  28. -- all = http.pipeline_add('/book', nil, all)
  29. -- all = http.pipeline_add('/test', nil, all)
  30. -- all = http.pipeline_add('/monkeys', nil, all)
  31. --
  32. -- -- Perform all three requests as parallel as Nmap is able to
  33. -- local results = http.pipeline('nmap.org', 80, all)
  34. --</code>
  35. --
  36. -- At this point, <code>results</code> is an array with three elements. Each element
  37. -- is a table containing the HTTP result, as discussed above.
  38. --
  39. -- One more interface provided by the HTTP library helps scripts determine whether or not
  40. -- a page exists. The <code>identify_404</code> function will try several URLs on the
  41. -- server to determine what the server's 404 pages look like. It will attempt to identify
  42. -- customized 404 pages that may not return the actual status code 404. If successful,
  43. -- the function <code>page_exists</code> can then be used to determine whether or not
  44. -- a page existed.
  45. --
  46. -- Some other miscellaneous functions that can come in handy are <code>response_contains</code>,
  47. -- <code>can_use_head</code>, and <code>save_path</code>. See the appropriate documentation
  48. -- for them.
  49. --
  50. -- The response to each function is typically a table with the following keys:
  51. -- <code>status-line</code>: The HTTP status line; for example, "HTTP/1.1 200 OK" (note: this is followed by a newline). In case of an error, a description will be provided in this line.
  52. -- <code>status</code>: The HTTP status value; for example, "200". If an error occurs during a request, then this value is going to be nil.
  53. -- <code>header</code>: A table of header values, where the keys are lowercase and the values are exactly what the server sent
  54. -- <code>rawheader</code>: A list of header values as "name: value" strings, in the exact format and order that the server sent them
  55. -- <code>cookies</code>: A list of cookies that the server is sending. Each cookie is a table containing the keys <code>name</code>, <code>value</code>, and <code>path</code>. This table can be sent to the server in subsequent responses in the <code>options</code> table to any function (see below).
  56. -- <code>body</code>: The body of the response
  57. --
  58. -- Many of the functions optionally allow an 'options' table. This table can alter the HTTP headers
  59. -- or other values like the timeout. The following are valid values in 'options' (note: not all
  60. -- options will necessarily affect every function):
  61. -- * <code>timeout</code>: A timeout used for socket operations.
  62. -- * <code>header</code>: A table containing additional headers to be used for the request. For example, <code>options['header']['Content-Type'] = 'text/xml'</code>
  63. -- * <code>content</code>: The content of the message (content-length will be added -- set header['Content-Length'] to override). This can be either a string, which will be directly added as the body of the message, or a table, which will have each key=value pair added (like a normal POST request).
  64. -- * <code>cookies</code>: A list of cookies as either a string, which will be directly sent, or a table. If it's a table, the following fields are recognized:
  65. -- ** <code>name</code>
  66. -- ** <code>value</code>
  67. -- ** <code>path</code>
  68. -- ** <code>expires</code>
  69. -- Only <code>name</code> and <code>value</code> fields are required.
  70. -- * <code>auth</code>: A table containing the keys <code>username</code> and <code>password</code>, which will be used for HTTP Basic authentication.
  71. -- If a server requires HTTP Digest authentication, then there must also be a key <code>digest</code>, with value <code>true</code>.
  72. -- * <code>bypass_cache</code>: Do not perform a lookup in the local HTTP cache.
  73. -- * <code>no_cache</code>: Do not save the result of this request to the local HTTP cache.
  74. -- * <code>no_cache_body</code>: Do not save the body of the response to the local HTTP cache.
  75. -- * <code>redirect_ok</code>: Closure that overrides the default redirect_ok used to validate whether to follow HTTP redirects or not. False, if no HTTP redirects should be followed.
  76. -- The following example shows how to write a custom closure that follows 5 consecutive redirects:
  77. -- <code>
  78. -- redirect_ok = function(host,port)
  79. -- local c = 5
  80. -- return function(url)
  81. -- if ( c==0 ) then return false end
  82. -- c = c - 1
  83. -- return true
  84. -- end
  85. -- end
  86. -- </code>
  87. --
  88. -- @args http.max-cache-size The maximum memory size (in bytes) of the cache.
  89. --
  90. -- @args http.useragent The value of the User-Agent header field sent with
  91. -- requests. By default it is
  92. -- <code>"Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)"</code>.
  93. -- A value of the empty string disables sending the User-Agent header field.
  94. --
  95. -- @args http.pipeline If set, it represents the number of HTTP requests that'll be
  96. -- sent on one connection. This can be set low to make debugging easier, or it
  97. -- can be set high to test how a server reacts (its chosen max is ignored).
  98. -- @args http.max-pipeline If set, it represents the number of outstanding HTTP requests
  99. -- that should be pipelined. Defaults to <code>http.pipeline</code> (if set), or to what
  100. -- <code>getPipelineMax</code> function returns.
  101. --
  102. -- TODO
  103. -- Implement cache system for http pipelines
  104. --
  105. local base64 = require "base64"
  106. local comm = require "comm"
  107. local coroutine = require "coroutine"
  108. local nmap = require "nmap"
  109. local os = require "os"
  110. local sasl = require "sasl"
  111. local stdnse = require "stdnse"
  112. local string = require "string"
  113. local table = require "table"
  114. local url = require "url"
  115. _ENV = stdnse.module("http", stdnse.seeall)
  116. ---Use ssl if we have it
  117. local have_ssl, openssl = pcall(require,'openssl')
  118. USER_AGENT = stdnse.get_script_args('http.useragent') or "Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)"
  119. local MAX_REDIRECT_COUNT = 5
  120. -- Recursively copy a table.
  121. -- Only recurs when a value is a table, other values are copied by assignment.
  122. local function tcopy (t)
  123. local tc = {};
  124. for k,v in pairs(t) do
  125. if type(v) == "table" then
  126. tc[k] = tcopy(v);
  127. else
  128. tc[k] = v;
  129. end
  130. end
  131. return tc;
  132. end
  133. --- Recursively copy into a table any elements from another table whose key it
  134. -- doesn't have.
  135. local function table_augment(to, from)
  136. for k, v in pairs(from) do
  137. if type( to[k] ) == 'table' then
  138. table_augment(to[k], from[k])
  139. else
  140. to[k] = from[k]
  141. end
  142. end
  143. end
  144. --- Get a value suitable for the Host header field.
  145. -- See RFC 2616 sections 14.23 and 5.2.
  146. local function get_host_field(host, port)
  147. return stdnse.get_hostname(host)
  148. end
  149. -- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2.
  150. -- @return the first index following the spaces.
  151. -- @return the spaces skipped over.
  152. local function skip_space(s, offset)
  153. local _, i, space = s:find("^([ \t]*)", offset)
  154. return i + 1, space
  155. end
  156. -- Get a token starting at offset. See RFC 2616, section 2.2.
  157. -- @return the first index following the token, or nil if no token was found.
  158. -- @return the token.
  159. local function get_token(s, offset)
  160. -- All characters except CTL and separators.
  161. local _, i, token = s:find("^([^()<>@,;:\\\"/%[%]?={} \0\001-\031\127]+)", offset)
  162. if i then
  163. return i + 1, token
  164. else
  165. return nil
  166. end
  167. end
  168. -- Get a quoted-string starting at offset. See RFC 2616, section 2.2. crlf is
  169. -- used as the definition for CRLF in the case of LWS within the string.
  170. -- @return the first index following the quoted-string, or nil if no
  171. -- quoted-string was found.
  172. -- @return the contents of the quoted-string, without quotes or backslash
  173. -- escapes.
  174. local function get_quoted_string(s, offset, crlf)
  175. local result = {}
  176. local i = offset
  177. assert(s:sub(i, i) == "\"")
  178. i = i + 1
  179. while i <= s:len() do
  180. local c = s:sub(i, i)
  181. if c == "\"" then
  182. -- Found the closing quote, done.
  183. return i + 1, table.concat(result)
  184. elseif c == "\\" then
  185. -- This is a quoted-pair ("\" CHAR).
  186. i = i + 1
  187. c = s:sub(i, i)
  188. if c == "" then
  189. -- No character following.
  190. error(string.format("\\ escape at end of input while parsing quoted-string."))
  191. end
  192. -- Only CHAR may follow a backslash.
  193. if c:byte(1) > 127 then
  194. error(string.format("Unexpected character with value > 127 (0x%02X) in quoted-string.", c:byte(1)))
  195. end
  196. else
  197. -- This is qdtext, which is TEXT except for '"'.
  198. -- TEXT is "any OCTET except CTLs, but including LWS," however "a CRLF is
  199. -- allowed in the definition of TEXT only as part of a header field
  200. -- continuation." So there are really two definitions of quoted-string,
  201. -- depending on whether it's in a header field or not. This function does
  202. -- not allow CRLF.
  203. c = s:sub(i, i)
  204. if c ~= "\t" and c:match("^[\0\001-\031\127]$") then
  205. error(string.format("Unexpected control character in quoted-string: 0x%02X.", c:byte(1)))
  206. end
  207. end
  208. result[#result + 1] = c
  209. i = i + 1
  210. end
  211. return nil
  212. end
  213. -- Get a ( token | quoted-string ) starting at offset.
  214. -- @return the first index following the token or quoted-string, or nil if
  215. -- nothing was found.
  216. -- @return the token or quoted-string.
  217. local function get_token_or_quoted_string(s, offset, crlf)
  218. if s:sub(offset, offset) == "\"" then
  219. return get_quoted_string(s, offset)
  220. else
  221. return get_token(s, offset)
  222. end
  223. end
  224. -- Returns the index just past the end of LWS.
  225. local function skip_lws(s, pos)
  226. local _, e
  227. while true do
  228. while string.match(s, "^[ \t]", pos) do
  229. pos = pos + 1
  230. end
  231. _, e = string.find(s, "^\r?\n[ \t]", pos)
  232. if not e then
  233. return pos
  234. end
  235. pos = e + 1
  236. end
  237. end
  238. ---Validate an 'options' table, which is passed to a number of the HTTP functions. It is
  239. -- often difficult to track down a mistake in the options table, and requires fiddling
  240. -- with the http.lua source, but this should make that a lot easier.
  241. local function validate_options(options)
  242. local bad = false
  243. if(options == nil) then
  244. return true
  245. end
  246. for key, value in pairs(options) do
  247. if(key == 'timeout') then
  248. if(type(tonumber(value)) ~= 'number') then
  249. stdnse.print_debug(1, 'http: options.timeout contains a non-numeric value')
  250. bad = true
  251. end
  252. elseif(key == 'header') then
  253. if(type(value) ~= 'table') then
  254. stdnse.print_debug(1, "http: options.header should be a table")
  255. bad = true
  256. end
  257. elseif(key == 'content') then
  258. if(type(value) ~= 'string' and type(value) ~= 'table') then
  259. stdnse.print_debug(1, "http: options.content should be a string or a table")
  260. bad = true
  261. end
  262. elseif(key == 'cookies') then
  263. if(type(value) == 'table') then
  264. for _, cookie in ipairs(value) do
  265. for cookie_key, cookie_value in pairs(cookie) do
  266. if(cookie_key == 'name') then
  267. if(type(cookie_value) ~= 'string') then
  268. stdnse.print_debug(1, "http: options.cookies[i].name should be a string")
  269. bad = true
  270. end
  271. elseif(cookie_key == 'value') then
  272. if(type(cookie_value) ~= 'string') then
  273. stdnse.print_debug(1, "http: options.cookies[i].value should be a string")
  274. bad = true
  275. end
  276. elseif(cookie_key == 'path') then
  277. if(type(cookie_value) ~= 'string') then
  278. stdnse.print_debug(1, "http: options.cookies[i].path should be a string")
  279. bad = true
  280. end
  281. elseif(cookie_key == 'expires') then
  282. if(type(cookie_value) ~= 'string') then
  283. stdnse.print_debug(1, "http: options.cookies[i].expires should be a string")
  284. bad = true
  285. end
  286. else
  287. stdnse.print_debug(1, "http: Unknown field in cookie table: %s", cookie_key)
  288. bad = true
  289. end
  290. end
  291. end
  292. elseif(type(value) ~= 'string') then
  293. stdnse.print_debug(1, "http: options.cookies should be a table or a string")
  294. bad = true
  295. end
  296. elseif(key == 'auth') then
  297. if(type(value) == 'table') then
  298. if(value['username'] == nil or value['password'] == nil) then
  299. stdnse.print_debug(1, "http: options.auth should contain both a 'username' and a 'password' key")
  300. bad = true
  301. end
  302. else
  303. stdnse.print_debug(1, "http: options.auth should be a table")
  304. bad = true
  305. end
  306. elseif (key == 'digestauth') then
  307. if(type(value) == 'table') then
  308. local req_keys = {"username","realm","nonce","digest-uri","response"}
  309. for _,k in ipairs(req_keys) do
  310. if not value[k] then
  311. stdnse.print_debug(1, "http: options.digestauth missing key: %s",k)
  312. bad = true
  313. break
  314. end
  315. end
  316. else
  317. bad = true
  318. stdnse.print_debug(1, "http: options.digestauth should be a table")
  319. end
  320. elseif(key == 'bypass_cache' or key == 'no_cache' or key == 'no_cache_body') then
  321. if(type(value) ~= 'boolean') then
  322. stdnse.print_debug(1, "http: options.bypass_cache, options.no_cache, and options.no_cache_body must be boolean values")
  323. bad = true
  324. end
  325. elseif(key == 'redirect_ok') then
  326. if(type(value)~= 'function' and type(value)~='boolean') then
  327. stdnse.print_debug(1, "http: options.redirect_ok must be a function or boolean")
  328. bad = true
  329. end
  330. else
  331. stdnse.print_debug(1, "http: Unknown key in the options table: %s", key)
  332. end
  333. end
  334. return not(bad)
  335. end
  336. -- The following recv functions, and the function <code>next_response</code>
  337. -- follow a common pattern. They each take a <code>partial</code> argument
  338. -- whose value is data that has been read from the socket but not yet used in
  339. -- parsing, and they return as their second return value a new value for
  340. -- <code>partial</code>. The idea is that, for example, in reading from the
  341. -- socket to get the Status-Line, you will probably read too much and read part
  342. -- of the header. That part (the "partial") has to be retained when you go to
  343. -- parse the header. The common use pattern is this:
  344. -- <code>
  345. -- local partial
  346. -- status_line, partial = recv_line(socket, partial)
  347. -- ...
  348. -- header, partial = recv_header(socket, partial)
  349. -- ...
  350. -- </code>
  351. -- On error, the functions return <code>nil</code> and the second return value
  352. -- is an error message.
  353. -- Receive a single line (up to <code>\n</code>).
  354. local function recv_line(s, partial)
  355. local _, e
  356. local status, data
  357. local pos
  358. partial = partial or ""
  359. pos = 1
  360. while true do
  361. _, e = string.find(partial, "\n", pos, true)
  362. if e then
  363. break
  364. end
  365. status, data = s:receive()
  366. if not status then
  367. return status, data
  368. end
  369. pos = #partial
  370. partial = partial .. data
  371. end
  372. return string.sub(partial, 1, e), string.sub(partial, e + 1)
  373. end
  374. local function line_is_empty(line)
  375. return line == "\r\n" or line == "\n"
  376. end
  377. -- Receive up to and including the first blank line, but return everything up
  378. -- to and not including the final blank line.
  379. local function recv_header(s, partial)
  380. local lines = {}
  381. partial = partial or ""
  382. while true do
  383. local line
  384. line, partial = recv_line(s, partial)
  385. if not line then
  386. return line, partial
  387. end
  388. if line_is_empty(line) then
  389. break
  390. end
  391. lines[#lines + 1] = line
  392. end
  393. return table.concat(lines), partial
  394. end
  395. -- Receive until the connection is closed.
  396. local function recv_all(s, partial)
  397. local parts
  398. partial = partial or ""
  399. parts = {partial}
  400. while true do
  401. local status, part = s:receive()
  402. if not status then
  403. break
  404. else
  405. parts[#parts + 1] = part
  406. end
  407. end
  408. return table.concat(parts), ""
  409. end
  410. -- Receive exactly <code>length</code> bytes. Returns <code>nil</code> if that
  411. -- many aren't available.
  412. local function recv_length(s, length, partial)
  413. local parts, last
  414. partial = partial or ""
  415. parts = {}
  416. last = partial
  417. length = length - #last
  418. while length > 0 do
  419. local status
  420. parts[#parts + 1] = last
  421. status, last = s:receive()
  422. if not status then
  423. return nil
  424. end
  425. length = length - #last
  426. end
  427. -- At this point length is 0 or negative, and indicates the degree to which
  428. -- the last read "overshot" the desired length.
  429. if length == 0 then
  430. return table.concat(parts) .. last, ""
  431. else
  432. return table.concat(parts) .. string.sub(last, 1, length - 1), string.sub(last, length)
  433. end
  434. end
  435. -- Receive until the end of a chunked message body, and return the dechunked
  436. -- body.
  437. local function recv_chunked(s, partial)
  438. local chunks, chunk
  439. local chunk_size
  440. local pos
  441. chunks = {}
  442. repeat
  443. local line, hex, _, i
  444. line, partial = recv_line(s, partial)
  445. if not line then
  446. return nil, partial
  447. end
  448. pos = 1
  449. pos = skip_space(line, pos)
  450. -- Get the chunk-size.
  451. _, i, hex = string.find(line, "^([%x]+)", pos)
  452. if not i then
  453. return nil, string.format("Chunked encoding didn't find hex; got %q.", string.sub(line, pos, pos + 10))
  454. end
  455. pos = i + 1
  456. chunk_size = tonumber(hex, 16)
  457. if not chunk_size or chunk_size < 0 then
  458. return nil, string.format("Chunk size %s is not a positive integer.", hex)
  459. end
  460. -- Ignore chunk-extensions that may follow here.
  461. -- RFC 2616, section 2.1 ("Implied *LWS") seems to allow *LWS between the
  462. -- parts of a chunk-extension, but that is ambiguous. Consider this case:
  463. -- "1234;a\r\n =1\r\n...". It could be an extension with a chunk-ext-name
  464. -- of "a" (and no value), and a chunk-data beginning with " =", or it could
  465. -- be a chunk-ext-name of "a" with a value of "1", and a chunk-data
  466. -- starting with "...". We don't allow *LWS here, only ( SP | HT ), so the
  467. -- first interpretation will prevail.
  468. chunk, partial = recv_length(s, chunk_size, partial)
  469. if not chunk then
  470. return nil, partial
  471. end
  472. chunks[#chunks + 1] = chunk
  473. line, partial = recv_line(s, partial)
  474. if not line then
  475. -- this warning message was initially an error but was adapted
  476. -- to support broken servers, such as the Citrix XML Service
  477. stdnse.print_debug(2, "Didn't find CRLF after chunk-data.")
  478. elseif not string.match(line, "^\r?\n") then
  479. return nil, string.format("Didn't find CRLF after chunk-data; got %q.", line)
  480. end
  481. until chunk_size == 0
  482. return table.concat(chunks), partial
  483. end
  484. -- Receive a message body, assuming that the header has already been read by
  485. -- <code>recv_header</code>. The handling is sensitive to the request method
  486. -- and the status code of the response.
  487. local function recv_body(s, response, method, partial)
  488. local connection_close, connection_keepalive
  489. local version_major, version_minor
  490. local transfer_encoding
  491. local content_length
  492. local err
  493. partial = partial or ""
  494. -- First check for Connection: close and Connection: keep-alive. This is
  495. -- necessary to handle some servers that don't follow the protocol.
  496. connection_close = false
  497. connection_keepalive = false
  498. if response.header.connection then
  499. local offset, token
  500. offset = 0
  501. while true do
  502. offset, token = get_token(response.header.connection, offset + 1)
  503. if not offset then
  504. break
  505. end
  506. if string.lower(token) == "close" then
  507. connection_close = true
  508. elseif string.lower(token) == "keep-alive" then
  509. connection_keepalive = true
  510. end
  511. end
  512. end
  513. -- The HTTP version may also affect our decisions.
  514. version_major, version_minor = string.match(response["status-line"], "^HTTP/(%d+)%.(%d+)")
  515. -- See RFC 2616, section 4.4 "Message Length".
  516. -- 1. Any response message which "MUST NOT" include a message-body (such as
  517. -- the 1xx, 204, and 304 responses and any response to a HEAD request) is
  518. -- always terminated by the first empty line after the header fields...
  519. --
  520. -- Despite the above, some servers return a body with response to a HEAD
  521. -- request. So if an HTTP/1.0 server returns a response without Connection:
  522. -- keep-alive, or any server returns a response with Connection: close, read
  523. -- whatever's left on the socket (should be zero bytes).
  524. if string.upper(method) == "HEAD"
  525. or (response.status >= 100 and response.status <= 199)
  526. or response.status == 204 or response.status == 304 then
  527. if connection_close or (version_major == "1" and version_minor == "0" and not connection_keepalive) then
  528. return recv_all(s, partial)
  529. else
  530. return "", partial
  531. end
  532. end
  533. -- 2. If a Transfer-Encoding header field (section 14.41) is present and has
  534. -- any value other than "identity", then the transfer-length is defined by
  535. -- use of the "chunked" transfer-coding (section 3.6), unless the message
  536. -- is terminated by closing the connection.
  537. if response.header["transfer-encoding"]
  538. and response.header["transfer-encoding"] ~= "identity" then
  539. return recv_chunked(s, partial)
  540. end
  541. -- The Citrix XML Service sends a wrong "Transfer-Coding" instead of
  542. -- "Transfer-Encoding".
  543. if response.header["transfer-coding"]
  544. and response.header["transfer-coding"] ~= "identity" then
  545. return recv_chunked(s, partial)
  546. end
  547. -- 3. If a Content-Length header field (section 14.13) is present, its decimal
  548. -- value in OCTETs represents both the entity-length and the
  549. -- transfer-length. The Content-Length header field MUST NOT be sent if
  550. -- these two lengths are different (i.e., if a Transfer-Encoding header
  551. -- field is present). If a message is received with both a
  552. -- Transfer-Encoding header field and a Content-Length header field, the
  553. -- latter MUST be ignored.
  554. if response.header["content-length"] and not response.header["transfer-encoding"] then
  555. content_length = tonumber(response.header["content-length"])
  556. if not content_length then
  557. return nil, string.format("Content-Length %q is non-numeric", response.header["content-length"])
  558. end
  559. return recv_length(s, content_length, partial)
  560. end
  561. -- 4. If the message uses the media type "multipart/byteranges", and the
  562. -- ransfer-length is not otherwise specified, then this self- elimiting
  563. -- media type defines the transfer-length. [sic]
  564. -- Case 4 is unhandled.
  565. -- 5. By the server closing the connection.
  566. return recv_all(s, partial)
  567. end
  568. -- Sets response["status-line"] and response.status.
  569. local function parse_status_line(status_line, response)
  570. local version, status, reason_phrase
  571. response["status-line"] = status_line
  572. version, status, reason_phrase = string.match(status_line,
  573. "^HTTP/(%d%.%d) *(%d+) *(.*)\r?\n$")
  574. if not version then
  575. return nil, string.format("Error parsing status-line %q.", status_line)
  576. end
  577. -- We don't have a use for the version; ignore it.
  578. response.status = tonumber(status)
  579. if not response.status then
  580. return nil, string.format("Status code is not numeric: %s", status)
  581. end
  582. return true
  583. end
  584. -- Sets response.header and response.rawheader.
  585. local function parse_header(header, response)
  586. local pos
  587. local name, words
  588. local s, e
  589. response.header = {}
  590. response.rawheader = stdnse.strsplit("\r?\n", header)
  591. pos = 1
  592. while pos <= #header do
  593. -- Get the field name.
  594. e, name = get_token(header, pos)
  595. if not name or e > #header or string.sub(header, e, e) ~= ":" then
  596. return nil, string.format("Can't get header field name at %q", string.sub(header, pos, pos + 30))
  597. end
  598. pos = e + 1
  599. -- Skip initial space.
  600. pos = skip_lws(header, pos)
  601. -- Get non-space words separated by LWS, then join them with a single space.
  602. words = {}
  603. while pos <= #header and not string.match(header, "^\r?\n", pos) do
  604. s = pos
  605. while not string.match(header, "^[ \t]", pos) and
  606. not string.match(header, "^\r?\n", pos) do
  607. pos = pos + 1
  608. end
  609. words[#words + 1] = string.sub(header, s, pos - 1)
  610. pos = skip_lws(header, pos)
  611. end
  612. -- Set it in our table.
  613. name = string.lower(name)
  614. if response.header[name] then
  615. response.header[name] = response.header[name] .. ", " .. table.concat(words, " ")
  616. else
  617. response.header[name] = table.concat(words, " ")
  618. end
  619. -- Next field, or end of string. (If not it's an error.)
  620. s, e = string.find(header, "^\r?\n", pos)
  621. if not e then
  622. return nil, string.format("Header field named %q didn't end with CRLF", name)
  623. end
  624. pos = e + 1
  625. end
  626. return true
  627. end
  628. -- Parse the contents of a Set-Cookie header field. The result is an array
  629. -- containing tables of the form
  630. --
  631. -- { name = "NAME", value = "VALUE", Comment = "...", Domain = "...", ... }
  632. --
  633. -- Every key except "name" and "value" is optional.
  634. --
  635. -- This function attempts to support the cookie syntax defined in RFC 2109
  636. -- along with the backwards-compatibility suggestions from its section 10,
  637. -- "HISTORICAL". Values need not be quoted, but if they start with a quote they
  638. -- will be interpreted as a quoted string.
  639. local function parse_set_cookie(s)
  640. local cookies
  641. local name, value
  642. local _, pos
  643. cookies = {}
  644. pos = 1
  645. while true do
  646. local cookie = {}
  647. -- Get the NAME=VALUE part.
  648. pos = skip_space(s, pos)
  649. pos, cookie.name = get_token(s, pos)
  650. if not cookie.name then
  651. return nil, "Can't get cookie name."
  652. end
  653. pos = skip_space(s, pos)
  654. if pos > #s or string.sub(s, pos, pos) ~= "=" then
  655. return nil, string.format("Expected '=' after cookie name \"%s\".", cookie.name)
  656. end
  657. pos = pos + 1
  658. pos = skip_space(s, pos)
  659. if string.sub(s, pos, pos) == "\"" then
  660. pos, cookie.value = get_quoted_string(s, pos)
  661. else
  662. _, pos, cookie.value = string.find(s, "([^;]*)[ \t]*", pos)
  663. pos = pos + 1
  664. end
  665. if not cookie.value then
  666. return nil, string.format("Can't get value of cookie named \"%s\".", cookie.name)
  667. end
  668. pos = skip_space(s, pos)
  669. -- Loop over the attributes.
  670. while pos <= #s and string.sub(s, pos, pos) == ";" do
  671. pos = pos + 1
  672. pos = skip_space(s, pos)
  673. pos, name = get_token(s, pos)
  674. if not name then
  675. return nil, string.format("Can't get attribute name of cookie \"%s\".", cookie.name)
  676. end
  677. pos = skip_space(s, pos)
  678. if pos <= #s and string.sub(s, pos, pos) == "=" then
  679. pos = pos + 1
  680. pos = skip_space(s, pos)
  681. if string.sub(s, pos, pos) == "\"" then
  682. pos, value = get_quoted_string(s, pos)
  683. else
  684. -- account for the possibility of the expires attribute being empty or improperly formatted
  685. local last_pos = pos
  686. if string.lower(name) == "expires" then
  687. -- For version 0 cookies we must allow one comma for "expires".
  688. _, pos, value = string.find(s, "([^,]*,[^;,]*)[ \t]*", pos)
  689. else
  690. _, pos, value = string.find(s, "([^;,]*)[ \t]*", pos)
  691. end
  692. -- account for the possibility of the expires attribute being empty or improperly formatted
  693. if ( not(pos) ) then
  694. _, pos, value = s:find("([^;]*)", last_pos)
  695. end
  696. pos = pos + 1
  697. end
  698. if not value then
  699. return nil, string.format("Can't get value of cookie attribute \"%s\".", name)
  700. end
  701. else
  702. value = true
  703. end
  704. cookie[name:lower()] = value
  705. pos = skip_space(s, pos)
  706. end
  707. cookies[#cookies + 1] = cookie
  708. if pos > #s then
  709. break
  710. end
  711. if string.sub(s, pos, pos) ~= "," then
  712. return nil, string.format("Syntax error after cookie named \"%s\".", cookie.name)
  713. end
  714. pos = pos + 1
  715. pos = skip_space(s, pos)
  716. end
  717. return cookies
  718. end
  719. -- Read one response from the socket <code>s</code> and return it after
  720. -- parsing.
  721. local function next_response(s, method, partial)
  722. local response
  723. local status_line, header, body
  724. local status, err
  725. partial = partial or ""
  726. response = {
  727. status=nil,
  728. ["status-line"]=nil,
  729. header={},
  730. rawheader={},
  731. body=""
  732. }
  733. status_line, partial = recv_line(s, partial)
  734. if not status_line then
  735. return nil, partial
  736. end
  737. status, err = parse_status_line(status_line, response)
  738. if not status then
  739. return nil, err
  740. end
  741. header, partial = recv_header(s, partial)
  742. if not header then
  743. return nil, partial
  744. end
  745. status, err = parse_header(header, response)
  746. if not status then
  747. return nil, err
  748. end
  749. body, partial = recv_body(s, response, method, partial)
  750. if not body then
  751. return nil, partial
  752. end
  753. response.body = body
  754. -- We have the Status-Line, header, and body; now do any postprocessing.
  755. response.cookies = {}
  756. if response.header["set-cookie"] then
  757. response.cookies, err = parse_set_cookie(response.header["set-cookie"])
  758. if not response.cookies then
  759. -- Ignore a cookie parsing error.
  760. response.cookies = {}
  761. end
  762. end
  763. return response, partial
  764. end
  765. --- Tries to extract the max number of requests that should be made on
  766. -- a keep-alive connection based on "Keep-Alive: timeout=xx,max=yy" response
  767. -- header.
  768. --
  769. -- If the value is not available, an arbitrary value is used. If the connection
  770. -- is not explicitly closed by the server, this same value is attempted.
  771. --
  772. -- @param response The http response - Might be a table or a raw response
  773. -- @return The max number of requests on a keep-alive connection
  774. local function getPipelineMax(response)
  775. -- Allow users to override this with a script-arg
  776. local pipeline = stdnse.get_script_args({'http.pipeline', 'pipeline'})
  777. if(pipeline) then
  778. return tonumber(pipeline)
  779. end
  780. if response then
  781. if response.header and response.header.connection ~= "close" then
  782. if response.header["keep-alive"] then
  783. local max = string.match( response.header["keep-alive"], "max=(%d*)")
  784. if(max == nil) then
  785. return 40
  786. end
  787. return tonumber(max)
  788. else
  789. return 40
  790. end
  791. end
  792. end
  793. return 1
  794. end
  795. --- Builds a string to be added to the request mod_options table
  796. --
  797. -- @param cookies A cookie jar just like the table returned parse_set_cookie.
  798. -- @param path If the argument exists, only cookies with this path are included to the request
  799. -- @return A string to be added to the mod_options table
  800. local function buildCookies(cookies, path)
  801. local cookie = ""
  802. if type(cookies) == 'string' then return cookies end
  803. for _, ck in ipairs(cookies or {}) do
  804. local ckpath = ck["path"]
  805. if not path or not ckpath
  806. or ckpath == path
  807. or ckpath:sub(-1) == "/" and ckpath == path:sub(1, ckpath:len())
  808. or ckpath .. "/" == path:sub(1, ckpath:len()+1)
  809. then
  810. cookie = cookie .. ck["name"] .. "=" .. ck["value"] .. "; "
  811. end
  812. end
  813. return cookie:gsub("; $","")
  814. end
  815. -- HTTP cache.
  816. -- Cache of GET and HEAD requests. Uses <"host:port:path", record>.
  817. -- record is in the format:
  818. -- result: The result from http.get or http.head
  819. -- last_used: The time the record was last accessed or made.
  820. -- get: Was the result received from a request to get or recently wiped?
  821. -- size: The size of the record, equal to #record.result.body.
  822. local cache = {size = 0};
  823. local function check_size (cache)
  824. local max_size = tonumber(stdnse.get_script_args({'http.max-cache-size', 'http-max-cache-size'}) or 1e6);
  825. local size = cache.size;
  826. if size > max_size then
  827. stdnse.print_debug(1,
  828. "Current http cache size (%d bytes) exceeds max size of %d",
  829. size, max_size);
  830. table.sort(cache, function(r1, r2)
  831. return (r1.last_used or 0) < (r2.last_used or 0);
  832. end);
  833. for i, record in ipairs(cache) do
  834. if size <= max_size then break end
  835. local result = record.result;
  836. if type(result.body) == "string" then
  837. size = size - record.size;
  838. record.size, record.get, result.body = 0, false, "";
  839. end
  840. end
  841. cache.size = size;
  842. end
  843. stdnse.print_debug(2, "Final http cache size (%d bytes) of max size of %d",
  844. size, max_size);
  845. return size;
  846. end
  847. -- Unique value to signal value is being retrieved.
  848. -- Also holds <mutex, thread> pairs, working thread is value
  849. local WORKING = setmetatable({}, {__mode = "v"});
  850. local function lookup_cache (method, host, port, path, options)
  851. if(not(validate_options(options))) then
  852. return nil
  853. end
  854. options = options or {};
  855. local bypass_cache = options.bypass_cache; -- do not lookup
  856. local no_cache = options.no_cache; -- do not save result
  857. local no_cache_body = options.no_cache_body; -- do not save body
  858. if type(port) == "table" then port = port.number end
  859. local key = stdnse.get_hostname(host)..":"..port..":"..path;
  860. local mutex = nmap.mutex(tostring(lookup_cache)..key);
  861. local state = {
  862. mutex = mutex,
  863. key = key,
  864. method = method,
  865. bypass_cache = bypass_cache,
  866. no_cache = no_cache,
  867. no_cache_body = no_cache_body,
  868. };
  869. while true do
  870. mutex "lock";
  871. local record = cache[key];
  872. if bypass_cache or record == nil or method ~= record.method then
  873. WORKING[mutex] = coroutine.running();
  874. cache[key], state.old_record = WORKING, record;
  875. return nil, state;
  876. elseif record == WORKING then
  877. local working = WORKING[mutex];
  878. if working == nil or coroutine.status(working) == "dead" then
  879. -- thread died before insert_cache could be called
  880. cache[key] = nil; -- reset
  881. end
  882. mutex "done";
  883. else
  884. mutex "done";
  885. record.last_used = os.time();
  886. return tcopy(record.result), state;
  887. end
  888. end
  889. end
  890. local function response_is_cacheable(response)
  891. -- if response.status is nil, then an error must have occured during the request
  892. -- and we probably don't want to cache the response
  893. if not response.status then
  894. return false
  895. end
  896. -- 206 Partial Content. RFC 2616, 1.34: "...a cache that does not support the
  897. -- Range and Content-Range headers MUST NOT cache 206 (Partial Content)
  898. -- responses."
  899. if response.status == 206 then
  900. return false
  901. end
  902. -- RFC 2616, 13.4. "A response received with any [status code other than 200,
  903. -- 203, 206, 300, 301 or 410] (e.g. status codes 302 and 307) MUST NOT be
  904. -- returned in a reply to a subsequent request unless there are cache-control
  905. -- directives or another header(s) that explicitly allow it."
  906. -- We violate the standard here and allow these other codes to be cached,
  907. -- with the exceptions listed below.
  908. -- 401 Unauthorized. Caching this would prevent us from retrieving it later
  909. -- with the correct credentials.
  910. if response.status == 401 then
  911. return false
  912. end
  913. return true
  914. end
  915. local function insert_cache (state, response)
  916. local key = assert(state.key);
  917. local mutex = assert(state.mutex);
  918. if response == nil or state.no_cache or not response_is_cacheable(response) then
  919. cache[key] = state.old_record;
  920. else
  921. local record = {
  922. result = tcopy(response),
  923. last_used = os.time(),
  924. method = state.method,
  925. size = type(response.body) == "string" and #response.body or 0,
  926. };
  927. response = record.result; -- only modify copy
  928. cache[key], cache[#cache+1] = record, record;
  929. if state.no_cache_body then
  930. response.body = "";
  931. end
  932. if type(response.body) == "string" then
  933. cache.size = cache.size + #response.body;
  934. check_size(cache);
  935. end
  936. end
  937. mutex "done";
  938. end
  939. -- Return true if the given method requires a body in the request. In case no
  940. -- body was supplied we must send "Content-Length: 0".
  941. local function request_method_needs_content_length(method)
  942. return method == "POST"
  943. end
  944. -- For each of the following request functions, <code>host</code> may either be
  945. -- a string or a table, and <code>port</code> may either be a number or a
  946. -- table.
  947. --
  948. -- The format of the return value is a table with the following structure:
  949. -- {status = 200, status-line = "HTTP/1.1 200 OK", header = {}, rawheader = {}, body ="<html>...</html>"}
  950. -- The header table has an entry for each received header with the header name
  951. -- being the key. The table also has an entry named "status" which contains the
  952. -- http status code of the request.
  953. -- In case of an error, the status is nil and status-line describes the problem.
  954. local function http_error(status_line)
  955. return {
  956. status = nil,
  957. ["status-line"] = status_line,
  958. header = {},
  959. rawheader = {},
  960. body = nil,
  961. }
  962. end
  963. --- Build an HTTP request from parameters and return it as a string.
  964. --
  965. -- @param host The host this request is intended for.
  966. -- @param port The port this request is intended for.
  967. -- @param method The method to use.
  968. -- @param path The path for the request.
  969. -- @param options A table of options, which may include the keys:
  970. -- * <code>header</code>: A table containing additional headers to be used for the request.
  971. -- * <code>content</code>: The content of the message (content-length will be added -- set header['Content-Length'] to override)
  972. -- * <code>cookies</code>: A table of cookies in the form returned by <code>parse_set_cookie</code>.
  973. -- * <code>auth</code>: A table containing the keys <code>username</code> and <code>password</code>.
  974. -- @return A request string.
  975. -- @see generic_request
  976. local function build_request(host, port, method, path, options)
  977. if(not(validate_options(options))) then
  978. return nil
  979. end
  980. options = options or {}
  981. -- Private copy of the options table, used to add default header fields.
  982. local mod_options = {
  983. header = {
  984. Connection = "close",
  985. Host = get_host_field(host, port),
  986. ["User-Agent"] = USER_AGENT
  987. }
  988. }
  989. if options.cookies then
  990. local cookies = buildCookies(options.cookies, path)
  991. if #cookies > 0 then
  992. mod_options.header["Cookie"] = cookies
  993. end
  994. end
  995. if options.auth and not options.auth.digest then
  996. local username = options.auth.username
  997. local password = options.auth.password
  998. local credentials = "Basic " .. base64.enc(username .. ":" .. password)
  999. mod_options.header["Authorization"] = credentials
  1000. end
  1001. if options.digestauth then
  1002. local order = {"username", "realm", "nonce", "digest-uri", "algorithm", "response", "qop", "nc", "cnonce"}
  1003. local no_quote = {algorithm=true, qop=true, nc=true}
  1004. local creds = {}
  1005. for _,k in ipairs(order) do
  1006. local v = options.digestauth[k]
  1007. if v then
  1008. if no_quote[k] then
  1009. table.insert(creds, ("%s=%s"):format(k,v))
  1010. else
  1011. if k == "digest-uri" then
  1012. table.insert(creds, ('%s="%s"'):format("uri",v))
  1013. else
  1014. table.insert(creds, ('%s="%s"'):format(k,v))
  1015. end
  1016. end
  1017. end
  1018. end
  1019. local credentials = "Digest "..table.concat(creds, ", ")
  1020. mod_options.header["Authorization"] = credentials
  1021. end
  1022. local body
  1023. -- Build a form submission from a table, like "k1=v1&k2=v2".
  1024. if type(options.content) == "table" then
  1025. local parts = {}
  1026. local k, v
  1027. for k, v in pairs(options.content) do
  1028. parts[#parts + 1] = url.escape(k) .. "=" .. url.escape(v)
  1029. end
  1030. body = table.concat(parts, "&")
  1031. mod_options.header["Content-Type"] = "application/x-www-form-urlencoded"
  1032. elseif options.content then
  1033. body = options.content
  1034. elseif request_method_needs_content_length(method) then
  1035. body = ""
  1036. end
  1037. if body then
  1038. mod_options.header["Content-Length"] = #body
  1039. end
  1040. -- Add any other header fields into the local copy.
  1041. table_augment(mod_options, options)
  1042. -- We concat this string manually to allow null bytes in requests
  1043. local request_line = method.." "..path.." HTTP/1.1"
  1044. local header = {}
  1045. for name, value in pairs(mod_options.header) do
  1046. -- we concat this string manually to allow null bytes in requests
  1047. header[#header + 1] = name..": "..value
  1048. end
  1049. return request_line .. "\r\n" .. stdnse.strjoin("\r\n", header) .. "\r\n\r\n" .. (body or "")
  1050. end
  1051. --- Send a string to a host and port and return the HTTP result. This function
  1052. -- is like <code>generic_request</code>, to be used when you have a ready-made
  1053. -- request, not a collection of request parameters.
  1054. --
  1055. -- @param host The host to connect to.
  1056. -- @param port The port to connect to.
  1057. -- @param options A table of other parameters. It may have any of these fields:
  1058. -- * <code>timeout</code>: A timeout used for socket operations.
  1059. -- * <code>header</code>: A table containing additional headers to be used for the request.
  1060. -- * <code>content</code>: The content of the message (content-length will be added -- set header['Content-Length'] to override)
  1061. -- * <code>cookies</code>: A table of cookies in the form returned by <code>parse_set_cookie</code>.
  1062. -- * <code>auth</code>: A table containing the keys <code>username</code> and <code>password</code>.
  1063. -- @return A response table, see module documentation for description.
  1064. -- @see generic_request
  1065. local function request(host, port, data, options)
  1066. if(not(validate_options(options))) then
  1067. return http_error("Options failed to validate.")
  1068. end
  1069. local method
  1070. local header
  1071. local response
  1072. options = options or {}
  1073. if type(port) == 'table' then
  1074. if port.protocol and port.protocol ~= 'tcp' then
  1075. stdnse.print_debug(1, "http.request() supports the TCP protocol only, your request to %s cannot be completed.", host)
  1076. return http_error("Unsupported protocol.")
  1077. end
  1078. end
  1079. method = string.match(data, "^(%S+)")
  1080. local socket, partial, opts = comm.tryssl(host, port, data, { timeout = options.timeout })
  1081. if not socket then
  1082. return http_error("Error creating socket.")
  1083. end
  1084. repeat
  1085. response, partial = next_response(socket, method, partial)
  1086. if not response then
  1087. return http_error("There was an error in next_response function.")
  1088. end
  1089. -- See RFC 2616, sections 8.2.3 and 10.1.1, for the 100 Continue status.
  1090. -- Sometimes a server will tell us to "go ahead" with a POST body before
  1091. -- sending the real response. If we got one of those, skip over it.
  1092. until not (response.status >= 100 and response.status <= 199)
  1093. socket:close()
  1094. -- if SSL was used to retrieve the URL mark this in the response
  1095. response.ssl = ( opts == 'ssl' )
  1096. return response
  1097. end
  1098. ---Do a single request with a given method. The response is returned as the standard
  1099. -- response table (see the module documentation).
  1100. --
  1101. -- The <code>get</code>, <code>head</code>, and <code>post</code> functions are simple
  1102. -- wrappers around <code>generic_request</code>.
  1103. --
  1104. -- Any 1XX (informational) responses are discarded.
  1105. --
  1106. -- @param host The host to connect to.
  1107. -- @param port The port to connect to.
  1108. -- @param method The method to use; for example, 'GET', 'HEAD', etc.
  1109. -- @param path The path to retrieve.
  1110. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1111. -- @return A response table, see module documentation for description.
  1112. -- @see request
  1113. function generic_request(host, port, method, path, options)
  1114. if(not(validate_options(options))) then
  1115. return http_error("Options failed to validate.")
  1116. end
  1117. local digest_auth = options and options.auth and options.auth.digest
  1118. if digest_auth and not have_ssl then
  1119. stdnse.print_debug("http: digest auth requires openssl.")
  1120. end
  1121. if digest_auth and have_ssl then
  1122. -- If we want to do digest authentication, we have to make an initial
  1123. -- request to get realm, nonce and other fields.
  1124. local options_with_auth_removed = tcopy(options)
  1125. options_with_auth_removed["auth"] = nil
  1126. local r = generic_request(host, port, method, path, options_with_auth_removed)
  1127. local h = r.header['www-authenticate']
  1128. if not r.status or (h and not string.find(h:lower(), "digest.-realm")) then
  1129. stdnse.print_debug("http: the target doesn't support digest auth or there was an error during request.")
  1130. return http_error("The target doesn't support digest auth or there was an error during request.")
  1131. end
  1132. -- Compute the response hash
  1133. local dmd5 = sasl.DigestMD5:new(h, options.auth.username, options.auth.password, method, path)
  1134. local _, digest_table = dmd5:calcDigest()
  1135. options.digestauth = digest_table
  1136. end
  1137. return request(host, port, build_request(host, port, method, path, options), options)
  1138. end
  1139. ---Uploads a file using the PUT method and returns a result table. This is a simple wrapper
  1140. -- around <code>generic_request</code>
  1141. --
  1142. -- @param host The host to connect to.
  1143. -- @param port The port to connect to.
  1144. -- @param path The path to retrieve.
  1145. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1146. -- @param putdata The contents of the file to upload
  1147. -- @return A response table, see module documentation for description.
  1148. -- @see http.generic_request
  1149. function put(host, port, path, options, putdata)
  1150. if(not(validate_options(options))) then
  1151. return http_error("Options failed to validate.")
  1152. end
  1153. if ( not(putdata) ) then
  1154. return http_error("No file to PUT.")
  1155. end
  1156. local mod_options = {
  1157. content = putdata,
  1158. }
  1159. table_augment(mod_options, options or {})
  1160. return generic_request(host, port, "PUT", path, mod_options)
  1161. end
  1162. -- Check if the given URL is okay to redirect to. Return a table with keys
  1163. -- "host", "port", and "path" if okay, nil otherwise.
  1164. -- @param url table as returned by url.parse
  1165. -- @param host table as received by the action function
  1166. -- @param port table as received by the action function
  1167. -- @return loc table containing the new location
  1168. function redirect_ok(host, port)
  1169. -- A battery of tests a URL is subjected to in order to decide if it may be
  1170. -- redirected to. They incrementally fill in loc.host, loc.port, and loc.path.
  1171. local rules = {
  1172. -- Check if there's any credentials in the url
  1173. function (url, host, port)
  1174. -- bail if userinfo is present
  1175. return ( url.userinfo and false ) or true
  1176. end,
  1177. -- Check if the location is within the domain or host
  1178. function (url, host, port)
  1179. local hostname = stdnse.get_hostname(host)
  1180. if ( hostname == host.ip and host.ip == url.host.ip ) then
  1181. return true
  1182. end
  1183. local domain = hostname:match("^[^%.]-%.(.*)") or hostname
  1184. local match = ("^.*%s$"):format(domain)
  1185. if ( url.host:match(match) ) then
  1186. return true
  1187. end
  1188. return false
  1189. end,
  1190. -- Check whether the new location has the same port number
  1191. function (url, host, port)
  1192. -- port fixup, adds default ports 80 and 443 in case no url.port was
  1193. -- defined, we do this based on the url scheme
  1194. local url_port = url.port
  1195. if ( not(url_port) ) then
  1196. if ( url.scheme == "http" ) then
  1197. url_port = 80
  1198. elseif( url.scheme == "https" ) then
  1199. url_port = 443
  1200. end
  1201. end
  1202. if (not url_port) or tonumber(url_port) == port.number then
  1203. return true
  1204. end
  1205. return false
  1206. end,
  1207. -- Check whether the url.scheme matches the port.service
  1208. function (url, host, port)
  1209. -- if url.scheme is present then it must match the scanned port
  1210. if url.scheme and url.port then return true end
  1211. if url.scheme and url.scheme ~= port.service then return false end
  1212. return true
  1213. end,
  1214. -- make sure we're actually being redirected somewhere and not to the same url
  1215. function (url, host, port)
  1216. -- path cannot be unchanged unless host has changed
  1217. -- loc.path must be set if returning true
  1218. if ( not url.path or url.path == "/" ) and url.host == ( host.targetname or host.ip) then return false end
  1219. if not url.path then return true end
  1220. return true
  1221. end,
  1222. }
  1223. local counter = MAX_REDIRECT_COUNT
  1224. -- convert a numeric port to a table
  1225. if ( "number" == type(port) ) then
  1226. port = { number = port }
  1227. end
  1228. return function(url)
  1229. if ( counter == 0 ) then return false end
  1230. counter = counter - 1
  1231. for i, rule in ipairs( rules ) do
  1232. if ( not(rule( url, host, port )) ) then
  1233. --stdnse.print_debug("Rule failed: %d", i)
  1234. return false
  1235. end
  1236. end
  1237. return true
  1238. end
  1239. end
  1240. -- Handles a HTTP redirect
  1241. -- @param host table as received by the script action function
  1242. -- @param port table as received by the script action function
  1243. -- @param path string
  1244. -- @param response table as returned by http.get or http.head
  1245. -- @return url table as returned by <code>url.parse</code> or nil if there's no
  1246. -- redirect taking place
  1247. local function parse_redirect(host, port, path, response)
  1248. if ( not(tostring(response.status):match("^30[01237]$")) or
  1249. not(response.header) or
  1250. not(response.header.location) ) then
  1251. return nil
  1252. end
  1253. port = ( "number" == type(port) ) and { number = port } or port
  1254. local u = url.parse(response.header.location)
  1255. if ( not(u.host) ) then
  1256. -- we're dealing with a relative url
  1257. u.host = stdnse.get_hostname(host)
  1258. u.path = ((u.path:sub(1,1) == "/" and "" ) or "/" ) .. u.path -- ensuring leading slash
  1259. end
  1260. -- do port fixup
  1261. if ( not(u.port) ) then
  1262. if ( u.scheme == "http" ) then u.port = 80
  1263. elseif ( u.scheme == "https") then u.port = 443
  1264. else u.port = port.number end
  1265. end
  1266. if ( not(u.path) ) then
  1267. u.path = "/"
  1268. end
  1269. if ( u.query ) then
  1270. u.path = ("%s?%s"):format( u.path, u.query )
  1271. end
  1272. return u
  1273. end
  1274. -- Retrieves the correct function to use to validate HTTP redirects
  1275. -- @param host table as received by the action function
  1276. -- @param port table as received by the action function
  1277. -- @param options table as passed to http.get or http.head
  1278. -- @return redirect_ok function used to validate HTTP redirects
  1279. local function get_redirect_ok(host, port, options)
  1280. if ( options ) then
  1281. if ( options.redirect_ok == false ) then
  1282. return function() return false end
  1283. elseif( "function" == type(options.redirect_ok) ) then
  1284. return options.redirect_ok(host, port)
  1285. else
  1286. return redirect_ok(host, port)
  1287. end
  1288. else
  1289. return redirect_ok(host, port)
  1290. end
  1291. end
  1292. ---Fetches a resource with a GET request and returns the result as a table. This is a simple
  1293. -- wraper around <code>generic_request</code>, with the added benefit of having local caching
  1294. -- and support for HTTP redirects. Redirects are followed only if they pass all the
  1295. -- validation rules of the redirect_ok function. This function may be overridden by supplying
  1296. -- a custom function in the <code>redirect_ok</code> field of the options array. The default
  1297. -- function redirects the request if the destination is:
  1298. -- * Within the same host or domain
  1299. -- * Has the same port number
  1300. -- * Stays within the current scheme
  1301. -- * Does not exceed <code>MAX_REDIRECT_COUNT</code> count of redirects
  1302. --
  1303. -- Caching and redirects can be controlled in the <code>options</code> array, see module
  1304. -- documentation for more information.
  1305. --
  1306. -- @param host The host to connect to.
  1307. -- @param port The port to connect to.
  1308. -- @param path The path to retrieve.
  1309. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1310. -- @return A response table, see module documentation for description.
  1311. -- @see http.generic_request
  1312. function get(host, port, path, options)
  1313. if(not(validate_options(options))) then
  1314. return http_error("Options failed to validate.")
  1315. end
  1316. local redir_check = get_redirect_ok(host, port, options)
  1317. local response, state, location
  1318. local u = { host = host, port = port, path = path }
  1319. repeat
  1320. response, state = lookup_cache("GET", u.host, u.port, u.path, options);
  1321. if ( response == nil ) then
  1322. response = generic_request(u.host, u.port, "GET", u.path, options)
  1323. insert_cache(state, response);
  1324. end
  1325. u = parse_redirect(host, port, path, response)
  1326. if ( not(u) ) then
  1327. break
  1328. end
  1329. location = location or {}
  1330. table.insert(location, response.header.location)
  1331. until( not(redir_check(u)) )
  1332. response.location = location
  1333. return response
  1334. end
  1335. ---Parses a URL and calls <code>http.get</code> with the result. The URL can contain
  1336. -- all the standard fields, protocol://host:port/path
  1337. --
  1338. -- @param u The URL of the host.
  1339. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1340. -- @return A response table, see module documentation for description.
  1341. -- @see http.get
  1342. function get_url( u, options )
  1343. if(not(validate_options(options))) then
  1344. return http_error("Options failed to validate.")
  1345. end
  1346. local parsed = url.parse( u )
  1347. local port = {}
  1348. port.service = parsed.scheme
  1349. port.number = parsed.port
  1350. if not port.number then
  1351. if parsed.scheme == 'https' then
  1352. port.number = 443
  1353. else
  1354. port.number = 80
  1355. end
  1356. end
  1357. local path = parsed.path or "/"
  1358. if parsed.query then
  1359. path = path .. "?" .. parsed.query
  1360. end
  1361. return get( parsed.host, port, path, options )
  1362. end
  1363. ---Fetches a resource with a HEAD request. Like <code>get</code>, this is a simple
  1364. -- wrapper around <code>generic_request</code> with response caching. This function
  1365. -- also has support for HTTP redirects. Redirects are followed only if they pass
  1366. -- all the validation rules of the redirect_ok function. This function may be
  1367. -- overridden by supplying a custom function in the <code>redirect_ok</code> field
  1368. -- of the options array. The default function redirects the request if the
  1369. -- destination is:
  1370. -- * Within the same host or domain
  1371. -- * Has the same port number
  1372. -- * Stays within the current scheme
  1373. -- * Does not exceed <code>MAX_REDIRECT_COUNT</code> count of redirects
  1374. --
  1375. -- Caching and redirects can be controlled in the <code>options</code> array,
  1376. -- see module documentation for more information.
  1377. --
  1378. -- @param host The host to connect to.
  1379. -- @param port The port to connect to.
  1380. -- @param path The path to retrieve.
  1381. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1382. -- @return A response table, see module documentation for description.
  1383. -- @see http.generic_request
  1384. function head(host, port, path, options)
  1385. if(not(validate_options(options))) then
  1386. return http_error("Options failed to validate.")
  1387. end
  1388. local redir_check = get_redirect_ok(host, port, options)
  1389. local response, state, location
  1390. local u = { host = host, port = port, path = path }
  1391. repeat
  1392. response, state = lookup_cache("HEAD", host, port, path, options);
  1393. if response == nil then
  1394. response = generic_request(host, port, "HEAD", path, options)
  1395. insert_cache(state, response);
  1396. end
  1397. u = parse_redirect(host, port, path, response)
  1398. if ( not(u) ) then
  1399. break
  1400. end
  1401. location = location or {}
  1402. table.insert(location, response.header.location)
  1403. until( not(redir_check(u)) )
  1404. response.location = location
  1405. return response
  1406. end
  1407. ---Fetches a resource with a POST request. Like <code>get</code>, this is a simple
  1408. -- wrapper around <code>generic_request</code> except that postdata is handled
  1409. -- properly.
  1410. --
  1411. -- @param host The host to connect to.
  1412. -- @param port The port to connect to.
  1413. -- @param path The path to retrieve.
  1414. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1415. -- @param ignored Ignored for backwards compatibility.
  1416. -- @param postdata A string or a table of data to be posted. If a table, the keys and values must be strings, and they will be encoded into an application/x-www-form-encoded form submission.
  1417. -- @return A response table, see module documentation for description.
  1418. -- @see http.generic_request
  1419. function post( host, port, path, options, ignored, postdata )
  1420. if(not(validate_options(options))) then
  1421. return http_error("Options failed to validate.")
  1422. end
  1423. local mod_options = {
  1424. content = postdata,
  1425. }
  1426. table_augment(mod_options, options or {})
  1427. return generic_request(host, port, "POST", path, mod_options)
  1428. end
  1429. -- Deprecated pipeline functions
  1430. function pGet( host, port, path, options, ignored, allReqs )
  1431. stdnse.print_debug(1, "WARNING: pGet() is deprecated. Use pipeline_add() instead.")
  1432. return pipeline_add(path, options, allReqs, 'GET')
  1433. end
  1434. function pHead( host, port, path, options, ignored, allReqs )
  1435. stdnse.print_debug(1, "WARNING: pHead() is deprecated. Use pipeline_add instead.")
  1436. return pipeline_add(path, options, allReqs, 'HEAD')
  1437. end
  1438. function addPipeline(host, port, path, options, ignored, allReqs, method)
  1439. stdnse.print_debug(1, "WARNING: addPipeline() is deprecated! Use pipeline_add instead.")
  1440. return pipeline_add(path, options, allReqs, method)
  1441. end
  1442. function pipeline(host, port, allReqs)
  1443. stdnse.print_debug(1, "WARNING: pipeline() is deprecated. Use pipeline_go() instead.")
  1444. return pipeline_go(host, port, allReqs)
  1445. end
  1446. ---Adds a pending request to the HTTP pipeline. The HTTP pipeline is a set of requests that will
  1447. -- all be sent at the same time, or as close as the server allows. This allows more efficient
  1448. -- code, since requests are automatically buffered and sent simultaneously.
  1449. --
  1450. -- The <code>all_requests</code> argument contains the current list of queued requests (if this
  1451. -- is the first time calling <code>pipeline_add</code>, it should be <code>nil</code>). After
  1452. -- adding the request to end of the queue, the queue is returned and can be passed to the next
  1453. -- <code>pipeline_add</code> call.
  1454. --
  1455. -- When all requests have been queued, call <code>pipeline_go</code> with the all_requests table
  1456. -- that has been built.
  1457. --
  1458. -- @param path The path to retrieve.
  1459. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above).
  1460. -- @param all_requests [optional] The current pipeline queue (retunred from a previous <code>add_pipeline</code> call), or nil if it's the first call.
  1461. -- @param method [optional] The HTTP method ('get', 'head', 'post', etc). Default: 'get'.
  1462. -- @return Table with the pipeline get requests (plus this new one)
  1463. -- @see http.pipeline_go
  1464. function pipeline_add(path, options, all_requests, method)
  1465. if(not(validate_options(options))) then
  1466. return nil
  1467. end
  1468. method = method or 'GET'
  1469. all_requests = all_requests or {}
  1470. local mod_options = {
  1471. header = {
  1472. ["Connection"] = "keep-alive"
  1473. }
  1474. }
  1475. table_augment(mod_options, options or {})
  1476. local object = { method=method, path=path, options=mod_options }
  1477. table.insert(all_requests, object)
  1478. return all_requests
  1479. end
  1480. ---Performs all queued requests in the all_requests variable (created by the
  1481. -- <code>pipeline_add</code> function). Returns an array of responses, each of
  1482. -- which is a table as defined in the module documentation above.
  1483. --
  1484. -- @param host The host to connect to.
  1485. -- @param port The port to connect to.
  1486. -- @param all_requests A table with all the previously built pipeline requests
  1487. -- @return A list of responses, in the same order as the requests were queued. Each response is a table as described in the module documentation.
  1488. function pipeline_go(host, port, all_requests)
  1489. stdnse.print_debug("Total number of pipelined requests: " .. #all_requests)
  1490. local responses
  1491. local response
  1492. local partial
  1493. responses = {}
  1494. -- Check for an empty request
  1495. if (#all_requests == 0) then
  1496. stdnse.print_debug(1, "Warning: empty set of requests passed to http.pipeline()")
  1497. return responses
  1498. end
  1499. local socket, bopt
  1500. -- We'll try a first request with keep-alive, just to check if the server
  1501. -- supports and how many requests we can send into one socket!
  1502. local request = build_request(host, port, all_requests[1].method, all_requests[1].path, all_requests[1].options)
  1503. socket, partial, bopt = comm.tryssl(host, port, request, {connect_timeout=5000, request_timeout=3000, recv_before=false})
  1504. if not socket then
  1505. return nil
  1506. end
  1507. response, partial = next_response(socket, all_requests[1].method, partial)
  1508. if not response then
  1509. return nil
  1510. end
  1511. table.insert(responses, response)
  1512. local limit = getPipelineMax(response) -- how many requests to send on one connection
  1513. limit = limit > #all_requests and #all_requests or limit
  1514. local max_pipeline = stdnse.get_script_args("http.max-pipeline") or limit -- how many requests should be pipelined
  1515. local count = 1
  1516. stdnse.print_debug(1, "Number of requests allowed by pipeline: " .. limit)
  1517. while #responses < #all_requests do
  1518. local j, batch_end
  1519. -- we build a table with many requests, upper limited by the var "limit"
  1520. local requests = {}
  1521. if #responses + limit < #all_requests then
  1522. batch_end = #responses + limit
  1523. else
  1524. batch_end = #all_requests
  1525. end
  1526. j = #responses + 1
  1527. while j <= batch_end do
  1528. if j == batch_end then
  1529. all_requests[j].options.header["Connection"] = "close"
  1530. end
  1531. if j~= batch_end and all_requests[j].options.header["Connection"] ~= 'keep-alive' then
  1532. all_requests[j].options.header["Connection"] = 'keep-alive'
  1533. end
  1534. table.insert(requests, build_request(host, port, all_requests[j].method, all_requests[j].path, all_requests[j].options))
  1535. -- to avoid calling build_request more then one time on the same request,
  1536. -- we might want to build all the requests once, above the main while loop
  1537. j = j + 1
  1538. end
  1539. if count >= limit or not socket:get_info() then
  1540. socket:connect(host, port, bopt)
  1541. partial = ""
  1542. count = 0
  1543. end
  1544. socket:set_timeout(10000)
  1545. local start = 1
  1546. local len = #requests
  1547. local req_sent = 0
  1548. -- start sending the requests and pipeline them in batches of max_pipeline elements
  1549. while start <= len do
  1550. stdnse.print_debug(2, "HTTP pipeline: number of requests in current batch: %d, already sent: %d, responses from current batch: %d, all responses received: %d",len,start-1,count,#responses)
  1551. local req = {}
  1552. if max_pipeline == limit then
  1553. req = requests
  1554. else
  1555. for i=start,start+max_pipeline-1,1 do
  1556. table.insert(req, requests[i])
  1557. end
  1558. end
  1559. local num_req = #req
  1560. req = table.concat(req, "")
  1561. start = start + max_pipeline
  1562. socket:send(req)
  1563. req_sent = req_sent + num_req
  1564. local inner_count = 0
  1565. local fail = false
  1566. -- collect responses for the last batch
  1567. while inner_count < num_req and #responses < #all_requests do
  1568. response, partial = next_response(socket, all_requests[#responses + 1].method, partial)
  1569. if not response then
  1570. stdnse.print_debug("HTTP pipeline: there was a problem while receiving responses.")
  1571. stdnse.print_debug(3, "The request was:\n%s",req)
  1572. fail = true
  1573. break
  1574. end
  1575. count = count + 1
  1576. inner_count = inner_count + 1
  1577. responses[#responses + 1] = response
  1578. end
  1579. if fail then break end
  1580. end
  1581. socket:close()
  1582. if count == 0 then
  1583. stdnse.print_debug("Received 0 of %d expected responses.\nGiving up on pipeline.", limit);
  1584. break
  1585. elseif count < req_sent then
  1586. stdnse.print_debug("Received only %d of %d expected responses.\nDecreasing max pipelined requests to %d.", count, req_sent, count)
  1587. limit = count
  1588. end
  1589. end
  1590. stdnse.print_debug("Number of received responses: " .. #responses)
  1591. return responses
  1592. end
  1593. -- Parsing of specific headers. skip_space and the read_* functions return the
  1594. -- byte index following whatever they have just read, or nil on error.
  1595. -- Skip whitespace (that has already been folded from LWS). See RFC 2616,
  1596. -- section 2.2, definition of LWS.
  1597. local function skip_space(s, pos)
  1598. local _
  1599. _, pos = string.find(s, "^[ \t]*", pos)
  1600. return pos + 1
  1601. end
  1602. -- See RFC 2616, section 2.2.
  1603. local function read_token(s, pos)
  1604. local _, token
  1605. pos = skip_space(s, pos)
  1606. -- 1*<any CHAR except CTLs or separators>. CHAR is only byte values 0-127.
  1607. _, pos, token = string.find(s, "^([^\0\001-\031()<>@,;:\\\"/?={} \t%[%]\127-\255]+)", pos)
  1608. if token then
  1609. return pos + 1, token
  1610. else
  1611. return nil
  1612. end
  1613. end
  1614. -- See RFC 2616, section 2.2. Here we relax the restriction that TEXT may not
  1615. -- contain CTLs.
  1616. local function read_quoted_string(s, pos)
  1617. local chars = {}
  1618. if string.sub(s, pos, pos) ~= "\"" then
  1619. return nil
  1620. end
  1621. pos = pos + 1
  1622. pos = skip_space(s, pos)
  1623. while pos <= #s and string.sub(s, pos, pos) ~= "\"" do
  1624. local c
  1625. c = string.sub(s, pos, pos)
  1626. if c == "\\" then
  1627. if pos < #s then
  1628. pos = pos + 1
  1629. c = string.sub(s, pos, pos)
  1630. else
  1631. return nil
  1632. end
  1633. end
  1634. chars[#chars + 1] = c
  1635. pos = pos + 1
  1636. end
  1637. if pos > #s or string.sub(s, pos, pos) ~= "\"" then
  1638. return nil
  1639. end
  1640. return pos + 1, table.concat(chars)
  1641. end
  1642. local function read_token_or_quoted_string(s, pos)
  1643. pos = skip_space(s, pos)
  1644. if string.sub(s, pos, pos) == "\"" then
  1645. return read_quoted_string(s, pos)
  1646. else
  1647. return read_token(s, pos)
  1648. end
  1649. end
  1650. ---
  1651. -- Finds forms in html code
  1652. -- returns table of found forms, in plaintext.
  1653. -- @param body A <code>response.body</code> in which to search for forms
  1654. -- @return A list of forms.
  1655. function grab_forms(body)
  1656. local forms = {}
  1657. if not body then return forms end
  1658. local form_start_expr = '<%s*[Ff][Oo][Rr][Mm]'
  1659. local form_end_expr = '</%s*[Ff][Oo][Rr][Mm]>'
  1660. local form_opening = string.find(body, form_start_expr)
  1661. local forms = {}
  1662. while form_opening do
  1663. local form_closing = string.find(body, form_end_expr, form_opening+1)
  1664. if form_closing == nil then --html code contains errors
  1665. break
  1666. end
  1667. forms[#forms+1] = string.sub(body, form_opening, form_closing-1)
  1668. if form_closing+1 <= #body then
  1669. form_opening = string.find(body, form_start_expr, form_closing+1)
  1670. else
  1671. break
  1672. end
  1673. end
  1674. return forms
  1675. end
  1676. ---
  1677. -- Parses a form, that is, finds its action and fields.
  1678. -- @param form A plaintext representation of form
  1679. -- @return A dictionary with keys: <code>action</action>,
  1680. -- <code>method</code> if one is specified, <code>fields</code>
  1681. -- which is a list of fields found in the form each of which has a
  1682. -- <code>name</code> attribute and <code>type</code> if specified.
  1683. function parse_form(form)
  1684. local parsed = {}
  1685. local fields = {}
  1686. local form_action = string.match(form, '[Aa][Cc][Tt][Ii][Oo][Nn]=[\'"](.-)[\'"]')
  1687. if form_action then
  1688. parsed["action"] = form_action
  1689. else
  1690. return nil
  1691. end
  1692. -- determine if the form is using get or post
  1693. local form_method = string.match(form, '[Mm][Ee][Tt][Hh][Oo][Dd]=[\'"](.-)[\'"]')
  1694. if form_method then
  1695. parsed["method"] = string.lower(form_method)
  1696. end
  1697. -- get the id of the form
  1698. local form_id = string.match(form, '[iI][dD]=[\'"](.-)[\'"]')
  1699. if form_id then
  1700. parsed["id"] = string.lower(form_id)
  1701. end
  1702. -- now identify the fields
  1703. local input_type
  1704. local input_name
  1705. local input_value
  1706. -- first find regular inputs
  1707. for f in string.gmatch(form, '<%s*[Ii][Nn][Pp][Uu][Tt].->') do
  1708. input_type = string.match(f, '[Tt][Yy][Pp][Ee]=[\'"](.-)[\'"]')
  1709. input_name = string.match(f, '[Nn][Aa][Mm][Ee]=[\'"](.-)[\'"]')
  1710. input_value = string.match(f, '[Vv][Aa][Ll][Uu][Ee]=[\'"](.-)[\'"]')
  1711. local next_field_index = #fields+1
  1712. if input_name then
  1713. fields[next_field_index] = {}
  1714. fields[next_field_index]["name"] = input_name
  1715. if input_type then
  1716. fields[next_field_index]["type"] = string.lower(input_type)
  1717. end
  1718. if input_value then
  1719. fields[next_field_index]["value"] = input_value
  1720. end
  1721. end
  1722. end
  1723. -- now search for textareas
  1724. for f in string.gmatch(form, '<%s*[Tt][Ee][Xx][Tt][Aa][Rr][Ee][Aa].->') do
  1725. input_name = string.match(f, '[Nn][Aa][Mm][Ee]=[\'"](.-)[\'"]')
  1726. local next_field_index = #fields+1
  1727. if input_name then
  1728. fields[next_field_index] = {}
  1729. fields[next_field_index]["name"] = input_name
  1730. fields[next_field_index]["type"] = "textarea"
  1731. end
  1732. end
  1733. parsed["fields"] = fields
  1734. return parsed
  1735. end
  1736. local MONTH_MAP = {
  1737. Jan = 1, Feb = 2, Mar = 3, Apr = 4, May = 5, Jun = 6,
  1738. Jul = 7, Aug = 8, Sep = 9, Oct = 10, Nov = 11, Dec = 12
  1739. }
  1740. --- Parses an HTTP date string, in any of the following formats from section
  1741. -- 3.3.1 of RFC 2616:
  1742. -- * Sun, 06 Nov 1994 08:49:37 GMT (RFC 822, updated by RFC 1123)
  1743. -- * Sunday, 06-Nov-94 08:49:37 GMT (RFC 850, obsoleted by RFC 1036)
  1744. -- * Sun Nov 6 08:49:37 1994 (ANSI C's <code>asctime()</code> format)
  1745. -- @param s the date string.
  1746. -- @return a table with keys <code>year</code>, <code>month</code>,
  1747. -- <code>day</code>, <code>hour</code>, <code>min</code>, <code>sec</code>, and
  1748. -- <code>isdst</code>, relative to GMT, suitable for input to
  1749. -- <code>os.time</code>.
  1750. function parse_date(s)
  1751. local day, month, year, hour, min, sec, tz, month_name
  1752. -- Handle RFC 1123 and 1036 at once.
  1753. day, month_name, year, hour, min, sec, tz = s:match("^%w+, (%d+)[- ](%w+)[- ](%d+) (%d+):(%d+):(%d+) (%w+)$")
  1754. if not day then
  1755. month_name, day, hour, min, sec, year = s:match("%w+ (%w+) ?(%d+) (%d+):(%d+):(%d+) (%d+)")
  1756. tz = "GMT"
  1757. end
  1758. if not day then
  1759. stdnse.print_debug(1, "http.parse_date: can't parse date \"%s\": unknown format.", s)
  1760. return nil
  1761. end
  1762. -- Look up the numeric code for month.
  1763. month = MONTH_MAP[month_name]
  1764. if not month then
  1765. stdnse.print_debug(1, "http.parse_date: unknown month name \"%s\".", month_name)
  1766. return nil
  1767. end
  1768. if tz ~= "GMT" then
  1769. stdnse.print_debug(1, "http.parse_date: don't know time zone \"%s\", only \"GMT\".", tz)
  1770. return nil
  1771. end
  1772. day = tonumber(day)
  1773. year = tonumber(year)
  1774. hour = tonumber(hour)
  1775. min = tonumber(min)
  1776. sec = tonumber(sec)
  1777. if year < 100 then
  1778. -- Two-digit year. Make a guess.
  1779. if year < 70 then
  1780. year = year + 2000
  1781. else
  1782. year = year + 1900
  1783. end
  1784. end
  1785. return { year = year, month = month, day = day, hour = hour, min = min, sec = sec, isdst = false }
  1786. end
  1787. -- See RFC 2617, section 1.2. This function returns a table with keys "scheme"
  1788. -- and "params".
  1789. local function read_auth_challenge(s, pos)
  1790. local _, scheme, params
  1791. pos, scheme = read_token(s, pos)
  1792. if not scheme then
  1793. return nil
  1794. end
  1795. params = {}
  1796. pos = skip_space(s, pos)
  1797. while pos < #s do
  1798. local name, val
  1799. local tmp_pos
  1800. -- We need to peek ahead at this point. It's possible that we've hit the
  1801. -- end of one challenge and the beginning of another. Section 14.33 says
  1802. -- that the header value can be 1#challenge, in other words several
  1803. -- challenges separated by commas. Because the auth-params are also
  1804. -- separated by commas, the only way we can tell is if we find a token not
  1805. -- followed by an equals sign.
  1806. tmp_pos = pos
  1807. tmp_pos, name = read_token(s, tmp_pos)
  1808. if not name then
  1809. pos = skip_space(s, pos + 1)
  1810. return pos, { scheme = scheme, params = nil }
  1811. end
  1812. tmp_pos = skip_space(s, tmp_pos)
  1813. if string.sub(s, tmp_pos, tmp_pos) ~= "=" then
  1814. -- No equals sign, must be the beginning of another challenge.
  1815. break
  1816. end
  1817. tmp_pos = tmp_pos + 1
  1818. pos = tmp_pos
  1819. pos, val = read_token_or_quoted_string(s, pos)
  1820. if not val then
  1821. return nil
  1822. end
  1823. if params[name] then
  1824. return nil
  1825. end
  1826. params[name] = val
  1827. pos = skip_space(s, pos)
  1828. if string.sub(s, pos, pos) == "," then
  1829. pos = skip_space(s, pos + 1)
  1830. if pos > #s then
  1831. return nil
  1832. end
  1833. end
  1834. end
  1835. return pos, { scheme = scheme, params = params }
  1836. end
  1837. ---Parses the WWW-Authenticate header as described in RFC 2616, section 14.47
  1838. -- and RFC 2617, section 1.2. The return value is an array of challenges. Each
  1839. -- challenge is a table with the keys <code>scheme</code> and
  1840. -- <code>params</code>.
  1841. -- @param s The header value text.
  1842. -- @return An array of challenges, or <code>nil</code> on error.
  1843. function parse_www_authenticate(s)
  1844. local challenges = {}
  1845. local pos
  1846. pos = 1
  1847. while pos <= #s do
  1848. local challenge
  1849. pos, challenge = read_auth_challenge(s, pos)
  1850. if not challenge then
  1851. return nil
  1852. end
  1853. challenges[#challenges + 1] = challenge
  1854. end
  1855. return challenges
  1856. end
  1857. ---Take the data returned from a HTTP request and return the status string.
  1858. -- Useful for <code>stdnse.print_debug</code> messages and even advanced output.
  1859. --
  1860. -- @param data The response table from any HTTP request
  1861. -- @return The best status string we could find: either the actual status string, the status code, or <code>"<unknown status>"</code>.
  1862. function get_status_string(data)
  1863. -- Make sure we have valid data
  1864. if(data == nil) then
  1865. return "<unknown status>"
  1866. elseif(data['status-line'] == nil) then
  1867. if(data['status'] ~= nil) then
  1868. return data['status']
  1869. end
  1870. return "<unknown status>"
  1871. end
  1872. -- We basically want everything after the space
  1873. local space = string.find(data['status-line'], ' ')
  1874. if(space == nil) then
  1875. return data['status-line']
  1876. else
  1877. return (string.sub(data['status-line'], space + 1)):gsub('\r?\n', '')
  1878. end
  1879. end
  1880. ---Determine whether or not the server supports HEAD by requesting / and
  1881. -- verifying that it returns 200, and doesn't return data. We implement the
  1882. -- check like this because can't always rely on OPTIONS to tell the truth.
  1883. --
  1884. -- Note: If <code>identify_404</code> returns a 200 status, HEAD requests
  1885. -- should be disabled. Sometimes, servers use a 200 status code with a message
  1886. -- explaining that the page wasn't found. In this case, to actually identify
  1887. -- a 404 page, we need the full body that a HEAD request doesn't supply.
  1888. -- This is determined automatically if the <code>result_404</code> field is
  1889. -- set.
  1890. --
  1891. -- @param host The host object.
  1892. -- @param port The port to use.
  1893. -- @param result_404 [optional] The result when an unknown page is requested.
  1894. -- This is returned by <code>identify_404</code>. If the 404 page returns a
  1895. -- 200 code, then we disable HEAD requests.
  1896. -- @param path The path to request; by default, / is used.
  1897. -- @return A boolean value: true if HEAD is usable, false otherwise.
  1898. -- @return If HEAD is usable, the result of the HEAD request is returned (so
  1899. -- potentially, a script can avoid an extra call to HEAD
  1900. function can_use_head(host, port, result_404, path)
  1901. -- If the 404 result is 200, don't use HEAD.
  1902. if(result_404 == 200) then
  1903. return false
  1904. end
  1905. -- Default path
  1906. if(path == nil) then
  1907. path = '/'
  1908. end
  1909. -- Perform a HEAD request and see what happens.
  1910. local data = head( host, port, path )
  1911. if data then
  1912. if data.status and data.status == 302 and data.header and data.header.location then
  1913. stdnse.print_debug(1, "HTTP: Warning: Host returned 302 and not 200 when performing HEAD.")
  1914. return false
  1915. end
  1916. if data.status and data.status == 200 and data.header then
  1917. -- check that a body wasn't returned
  1918. if #data.body > 0 then
  1919. stdnse.print_debug(1, "HTTP: Warning: Host returned data when performing HEAD.")
  1920. return false
  1921. end
  1922. stdnse.print_debug(1, "HTTP: Host supports HEAD.")
  1923. return true, data
  1924. end
  1925. stdnse.print_debug(1, "HTTP: Didn't receive expected response to HEAD request (got %s).", get_status_string(data))
  1926. return false
  1927. end
  1928. stdnse.print_debug(1, "HTTP: HEAD request completely failed.")
  1929. return false
  1930. end
  1931. --- Try and remove anything that might change within a 404. For example:
  1932. -- * A file path (includes URI)
  1933. -- * A time
  1934. -- * A date
  1935. -- * An execution time (numbers in general, really)
  1936. --
  1937. -- The intention is that two 404 pages from different URIs and taken hours
  1938. -- apart should, whenever possible, look the same.
  1939. --
  1940. -- During this function, we're likely going to over-trim things. This is fine
  1941. -- -- we want enough to match on that it'll a) be unique, and b) have the best
  1942. -- chance of not changing. Even if we remove bits and pieces from the file, as
  1943. -- long as it isn't a significant amount, it'll remain unique.
  1944. --
  1945. -- One case this doesn't cover is if the server generates a random haiku for
  1946. -- the user.
  1947. --
  1948. -- @param body The body of the page.
  1949. function clean_404(body)
  1950. if ( not(body) ) then
  1951. return
  1952. end
  1953. -- Remove anything that looks like time
  1954. body = string.gsub(body, '%d?%d:%d%d:%d%d', "")
  1955. body = string.gsub(body, '%d%d:%d%d', "")
  1956. body = string.gsub(body, 'AM', "")
  1957. body = string.gsub(body, 'am', "")
  1958. body = string.gsub(body, 'PM', "")
  1959. body = string.gsub(body, 'pm', "")
  1960. -- Remove anything that looks like a date (this includes 6 and 8 digit numbers)
  1961. -- (this is probably unnecessary, but it's getting pretty close to 11:59 right now, so you never know!)
  1962. body = string.gsub(body, '%d%d%d%d%d%d%d%d', "") -- 4-digit year (has to go first, because it overlaps 2-digit year)
  1963. body = string.gsub(body, '%d%d%d%d%-%d%d%-%d%d', "")
  1964. body = string.gsub(body, '%d%d%d%d/%d%d/%d%d', "")
  1965. body = string.gsub(body, '%d%d%-%d%d%-%d%d%d%d', "")
  1966. body = string.gsub(body, '%d%d%/%d%d%/%d%d%d%d', "")
  1967. body = string.gsub(body, '%d%d%d%d%d%d', "") -- 2-digit year
  1968. body = string.gsub(body, '%d%d%-%d%d%-%d%d', "")
  1969. body = string.gsub(body, '%d%d%/%d%d%/%d%d', "")
  1970. -- Remove anything that looks like a path (note: this will get the URI too) (note2: this interferes with the date removal above, so it can't be moved up)
  1971. body = string.gsub(body, "/[^ ]+", "") -- Unix - remove everything from a slash till the next space
  1972. body = string.gsub(body, "[a-zA-Z]:\\[^ ]+", "") -- Windows - remove everything from a "x:\" pattern till the next space
  1973. -- If we have SSL available, save us a lot of memory by hashing the page (if SSL isn't available, this will work fine, but
  1974. -- take up more memory). If we're debugging, don't hash (it makes things far harder to debug).
  1975. if(have_ssl and nmap.debugging() == 0) then
  1976. return openssl.md5(body)
  1977. end
  1978. return body
  1979. end
  1980. ---Try requesting a non-existent file to determine how the server responds to
  1981. -- unknown pages ("404 pages"), which a) tells us what to expect when a
  1982. -- non-existent page is requested, and b) tells us if the server will be
  1983. -- impossible to scan. If the server responds with a 404 status code, as it is
  1984. -- supposed to, then this function simply returns 404. If it contains one of a
  1985. -- series of common status codes, including unauthorized, moved, and others, it
  1986. -- is returned like a 404.
  1987. --
  1988. -- I (Ron Bowes) have observed one host that responds differently for three
  1989. -- scenarios:
  1990. -- * A non-existent page, all lowercase (a login page)
  1991. -- * A non-existent page, with uppercase (a weird error page that says, "Filesystem is corrupt.")
  1992. -- * A page in a non-existent directory (a login page with different font colours)
  1993. --
  1994. -- As a result, I've devised three different 404 tests, one to check each of
  1995. -- these conditions. They all have to match, the tests can proceed; if any of
  1996. -- them are different, we can't check 404s properly.
  1997. --
  1998. -- @param host The host object.
  1999. -- @param port The port to which we are establishing the connection.
  2000. -- @return status Did we succeed?
  2001. -- @return result If status is false, result is an error message. Otherwise, it's the code to expect (typically, but not necessarily, '404').
  2002. -- @return body Body is a hash of the cleaned-up body that can be used when detecting a 404 page that doesn't return a 404 error code.
  2003. function identify_404(host, port)
  2004. local data
  2005. local bad_responses = { 301, 302, 400, 401, 403, 499, 501, 503 }
  2006. -- The URLs used to check 404s
  2007. local URL_404_1 = '/nmaplowercheck' .. os.time(os.date('*t'))
  2008. local URL_404_2 = '/NmapUpperCheck' .. os.time(os.date('*t'))
  2009. local URL_404_3 = '/Nmap/folder/check' .. os.time(os.date('*t'))
  2010. data = get(host, port, URL_404_1)
  2011. if(data == nil) then
  2012. stdnse.print_debug(1, "HTTP: Failed while testing for 404 status code")
  2013. return false, "Failed while testing for 404 error message"
  2014. end
  2015. if(data.status and data.status == 404) then
  2016. stdnse.print_debug(1, "HTTP: Host returns proper 404 result.")
  2017. return true, 404
  2018. end
  2019. if(data.status and data.status == 200) then
  2020. stdnse.print_debug(1, "HTTP: Host returns 200 instead of 404.")
  2021. -- Clean up the body (for example, remove the URI). This makes it easier to validate later
  2022. if(data.body) then
  2023. -- Obtain a couple more 404 pages to test different conditions
  2024. local data2 = get(host, port, URL_404_2)
  2025. local data3 = get(host, port, URL_404_3)
  2026. if(data2 == nil or data3 == nil) then
  2027. stdnse.print_debug(1, "HTTP: Failed while testing for extra 404 error messages")
  2028. return false, "Failed while testing for extra 404 error messages"
  2029. end
  2030. -- Check if the return code became something other than 200.
  2031. -- Status code: -1 represents unknown.
  2032. -- If the status is nil or the string "unknown" we switch to -1.
  2033. if(data2.status ~= 200) then
  2034. if(type(data2.status) ~= "number") then
  2035. data2.status = -1
  2036. end
  2037. stdnse.print_debug(1, "HTTP: HTTP 404 status changed for second request (became %d).", data2.status)
  2038. return false, string.format("HTTP 404 status changed for second request (became %d).", data2.status)
  2039. end
  2040. -- Check if the return code became something other than 200
  2041. if(data3.status ~= 200) then
  2042. if(type(data3.status) ~= "number") then
  2043. data3.status = -1
  2044. end
  2045. stdnse.print_debug(1, "HTTP: HTTP 404 status changed for third request (became %d).", data3.status)
  2046. return false, string.format("HTTP 404 status changed for third request (became %d).", data3.status)
  2047. end
  2048. -- Check if the returned bodies (once cleaned up) matches the first returned body
  2049. local clean_body = clean_404(data.body)
  2050. local clean_body2 = clean_404(data2.body)
  2051. local clean_body3 = clean_404(data3.body)
  2052. if(clean_body ~= clean_body2) then
  2053. stdnse.print_debug(1, "HTTP: Two known 404 pages returned valid and different pages; unable to identify valid response.")
  2054. stdnse.print_debug(1, "HTTP: If you investigate the server and it's possible to clean up the pages, please post to nmap-dev mailing list.")
  2055. return false, string.format("Two known 404 pages returned valid and different pages; unable to identify valid response.")
  2056. end
  2057. if(clean_body ~= clean_body3) then
  2058. stdnse.print_debug(1, "HTTP: Two known 404 pages returned valid and different pages; unable to identify valid response (happened when checking a folder).")
  2059. stdnse.print_debug(1, "HTTP: If you investigate the server and it's possible to clean up the pages, please post to nmap-dev mailing list.")
  2060. return false, string.format("Two known 404 pages returned valid and different pages; unable to identify valid response (happened when checking a folder).")
  2061. end
  2062. return true, 200, clean_body
  2063. end
  2064. stdnse.print_debug(1, "HTTP: The 200 response didn't contain a body.")
  2065. return true, 200
  2066. end
  2067. -- Loop through any expected error codes
  2068. for _,code in pairs(bad_responses) do
  2069. if(data.status and data.status == code) then
  2070. stdnse.print_debug(1, "HTTP: Host returns %s instead of 404 File Not Found.", get_status_string(data))
  2071. return true, code
  2072. end
  2073. end
  2074. stdnse.print_debug(1, "Unexpected response returned for 404 check: %s", get_status_string(data))
  2075. return true, data.status
  2076. end
  2077. --- Determine whether or not the page that was returned is a 404 page. This is
  2078. --actually a pretty simple function, but it's best to keep this logic close to
  2079. --<code>identify_404</code>, since they will generally be used together.
  2080. --
  2081. -- @param data The data returned by the HTTP request
  2082. -- @param result_404 The status code to expect for non-existent pages. This is returned by <code>identify_404</code>.
  2083. -- @param known_404 The 404 page itself, if <code>result_404</code> is 200. If <code>result_404</code> is something else, this parameter is ignored and can be set to <code>nil</code>. This is returned by <code>identify_404</code>.
  2084. -- @param page The page being requested (used in error messages).
  2085. -- @param displayall [optional] If set to true, don't exclude non-404 errors (such as 500).
  2086. -- @return A boolean value: true if the page appears to exist, and false if it does not.
  2087. function page_exists(data, result_404, known_404, page, displayall)
  2088. if(data and data.status) then
  2089. -- Handle the most complicated case first: the "200 Ok" response
  2090. if(data.status == 200) then
  2091. if(result_404 == 200) then
  2092. -- If the 404 response is also "200", deal with it (check if the body matches)
  2093. if(#data.body == 0) then
  2094. -- I observed one server that returned a blank string instead of an error, on some occasions
  2095. stdnse.print_debug(1, "HTTP: Page returned a totally empty body; page likely doesn't exist")
  2096. return false
  2097. elseif(clean_404(data.body) ~= known_404) then
  2098. stdnse.print_debug(1, "HTTP: Page returned a body that doesn't match known 404 body, therefore it exists (%s)", page)
  2099. return true
  2100. else
  2101. return false
  2102. end
  2103. else
  2104. -- If 404s return something other than 200, and we got a 200, we're good to go
  2105. stdnse.print_debug(1, "HTTP: Page was '%s', it exists! (%s)", get_status_string(data), page)
  2106. return true
  2107. end
  2108. else
  2109. -- If the result isn't a 200, check if it's a 404 or returns the same code as a 404 returned
  2110. if(data.status ~= 404 and data.status ~= result_404) then
  2111. -- If this check succeeded, then the page isn't a standard 404 -- it could be a redirect, authentication request, etc. Unless the user
  2112. -- asks for everything (with a script argument), only display 401 Authentication Required here.
  2113. stdnse.print_debug(1, "HTTP: Page didn't match the 404 response (%s) (%s)", get_status_string(data), page)
  2114. if(data.status == 401) then -- "Authentication Required"
  2115. return true
  2116. elseif(displayall) then
  2117. return true
  2118. end
  2119. return false
  2120. else
  2121. -- Page was a 404, or looked like a 404
  2122. return false
  2123. end
  2124. end
  2125. else
  2126. stdnse.print_debug(1, "HTTP: HTTP request failed (is the host still up?)")
  2127. return false
  2128. end
  2129. end
  2130. ---Check if the response variable, which could be a return from a http.get, http.post, http.pipeline,
  2131. -- etc, contains the given text. The text can be:
  2132. -- * Part of a header ('content-type', 'text/html', '200 OK', etc)
  2133. -- * An entire header ('Content-type: text/html', 'Content-length: 123', etc)
  2134. -- * Part of the body
  2135. --
  2136. -- The search text is treated as a Lua pattern.
  2137. --
  2138. --@param response The full response table from a HTTP request.
  2139. --@param pattern The pattern we're searching for. Don't forget to escape '-', for example, 'Content%-type'. The pattern can also contain captures, like 'abc(.*)def', which will be returned if successful.
  2140. --@param case_sensitive [optional] Set to <code>true</code> for case-sensitive searches. Default: not case sensitive.
  2141. --@return result True if the string matched, false otherwise
  2142. --@return matches An array of captures from the match, if any
  2143. function response_contains(response, pattern, case_sensitive)
  2144. local result, _
  2145. local m = {}
  2146. -- If they're searching for the empty string or nil, it's true
  2147. if(pattern == '' or pattern == nil) then
  2148. return true
  2149. end
  2150. -- Create a function that either lowercases everything or doesn't, depending on case sensitivity
  2151. local case = function(pattern) return string.lower(pattern or '') end
  2152. if(case_sensitive == true) then
  2153. case = function(pattern) return (pattern or '') end
  2154. end
  2155. -- Set the case of the pattern
  2156. pattern = case(pattern)
  2157. -- Check the status line (eg, 'HTTP/1.1 200 OK')
  2158. m = {string.match(case(response['status-line']), pattern)};
  2159. if(m and #m > 0) then
  2160. return true, m
  2161. end
  2162. -- Check the headers
  2163. for _, header in pairs(response['rawheader']) do
  2164. m = {string.match(case(header), pattern)}
  2165. if(m and #m > 0) then
  2166. return true, m
  2167. end
  2168. end
  2169. -- Check the body
  2170. m = {string.match(case(response['body']), pattern)}
  2171. if(m and #m > 0) then
  2172. return true, m
  2173. end
  2174. return false
  2175. end
  2176. ---Take a URI or URL in any form and convert it to its component parts. The URL can optionally
  2177. -- have a protocol definition ('http://'), a server ('scanme.insecure.org'), a port (':80'), a
  2178. -- URI ('/test/file.php'), and a query string ('?username=ron&password=turtle'). At the minimum,
  2179. -- a path or protocol and url are required.
  2180. --
  2181. --@param url The incoming URL to parse
  2182. --@return result A table containing the result, which can have the following fields: protocol, hostname, port, uri, querystring. All fields are strings except querystring, which is a table containing name=value pairs.
  2183. function parse_url(url)
  2184. local result = {}
  2185. -- Save the original URL
  2186. result['original'] = url
  2187. -- Split the protocol off, if it exists
  2188. local colonslashslash = string.find(url, '://')
  2189. if(colonslashslash) then
  2190. result['protocol'] = string.sub(url, 1, colonslashslash - 1)
  2191. url = string.sub(url, colonslashslash + 3)
  2192. end
  2193. -- Split the host:port from the path
  2194. local slash, host_port
  2195. slash = string.find(url, '/')
  2196. if(slash) then
  2197. host_port = string.sub(url, 1, slash - 1)
  2198. result['path_query'] = string.sub(url, slash)
  2199. else
  2200. -- If there's no slash, then it's just a URL (if it has a http://) or a path (if it doesn't)
  2201. if(result['protocol']) then
  2202. result['host_port'] = url
  2203. else
  2204. result['path_query'] = url
  2205. end
  2206. end
  2207. if(host_port == '') then
  2208. host_port = nil
  2209. end
  2210. -- Split the host and port apart, if possible
  2211. if(host_port) then
  2212. local colon = string.find(host_port, ':')
  2213. if(colon) then
  2214. result['host'] = string.sub(host_port, 1, colon - 1)
  2215. result['port'] = tonumber(string.sub(host_port, colon + 1))
  2216. else
  2217. result['host'] = host_port
  2218. end
  2219. end
  2220. -- Split the path and querystring apart
  2221. if(result['path_query']) then
  2222. local question = string.find(result['path_query'], '?')
  2223. if(question) then
  2224. result['path'] = string.sub(result['path_query'], 1, question - 1)
  2225. result['raw_querystring'] = string.sub(result['path_query'], question + 1)
  2226. else
  2227. result['path'] = result['path_query']
  2228. end
  2229. -- Split up the query, if necessary
  2230. if(result['raw_querystring']) then
  2231. result['querystring'] = {}
  2232. local values = stdnse.strsplit('&', result['raw_querystring'])
  2233. for i, v in ipairs(values) do
  2234. local name, value = table.unpack(stdnse.strsplit('=', v))
  2235. result['querystring'][name] = value
  2236. end
  2237. end
  2238. -- Get the extension of the file, if any, or set that it's a folder
  2239. if(string.match(result['path'], "/$")) then
  2240. result['is_folder'] = true
  2241. else
  2242. result['is_folder'] = false
  2243. local split_str = stdnse.strsplit('%.', result['path'])
  2244. if(split_str and #split_str > 1) then
  2245. result['extension'] = split_str[#split_str]
  2246. end
  2247. end
  2248. end
  2249. return result
  2250. end
  2251. ---This function should be called whenever a valid path (a path that doesn't contain a known
  2252. -- 404 page) is discovered. It will add the path to the registry in several ways, allowing
  2253. -- other scripts to take advantage of it in interesting ways.
  2254. --
  2255. --@param host The host the path was discovered on (not necessarily the host being scanned).
  2256. --@param port The port the path was discovered on (not necessarily the port being scanned).
  2257. --@param path The path discovered. Calling this more than once with the same path is okay; it'll update the data as much as possible instead of adding a duplicate entry
  2258. --@param status [optional] The status code (200, 404, 500, etc). This can be left off if it isn't known.
  2259. --@param links_to [optional] A table of paths that this page links to.
  2260. --@param linked_from [optional] A table of paths that link to this page.
  2261. --@param contenttype [optional] The content-type value for the path, if it's known.
  2262. function save_path(host, port, path, status, links_to, linked_from, contenttype)
  2263. -- Make sure we have a proper hostname and port
  2264. host = stdnse.get_hostname(host)
  2265. if(type(port) == 'table') then
  2266. port = port.number
  2267. end
  2268. -- Parse the path
  2269. local parsed = parse_url(path)
  2270. -- Add to the 'all_pages' key
  2271. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path'])
  2272. -- Add the URL with querystring to all_pages_full_query
  2273. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages_full_query'}, parsed['path_query'])
  2274. -- Add the URL to a key matching the response code
  2275. if(status) then
  2276. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'status_codes', status}, parsed['path'])
  2277. end
  2278. -- If it's a directory, add it to the directories list; otherwise, add it to the files list
  2279. if(parsed['is_folder']) then
  2280. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'directories'}, parsed['path'])
  2281. else
  2282. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'files'}, parsed['path'])
  2283. end
  2284. -- If we have an extension, add it to the extensions key
  2285. if(parsed['extension']) then
  2286. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'extensions', parsed['extension']}, parsed['path'])
  2287. end
  2288. -- Add an entry for the page and its arguments
  2289. if(parsed['querystring']) then
  2290. -- Add all scripts with a querystring to the 'cgi' and 'cgi_full_query' keys
  2291. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi'}, parsed['path'])
  2292. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_full_query'}, parsed['path_query'])
  2293. -- Add the query string alone to the registry (probably not necessary)
  2294. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring'])
  2295. -- Add the individual arguments for the page, along with their values
  2296. for key, value in pairs(parsed['querystring']) do
  2297. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_args', parsed['path']}, parsed['querystring'])
  2298. end
  2299. end
  2300. -- Save the pages it links to
  2301. if(links_to) then
  2302. if(type(links_to) == 'string') then
  2303. links_to = {links_to}
  2304. end
  2305. for _, v in ipairs(links_to) do
  2306. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', parsed['path_query']}, v)
  2307. end
  2308. end
  2309. -- Save the pages it's linked from (we save these in the 'links_to' key, reversed)
  2310. if(linked_from) then
  2311. if(type(linked_from) == 'string') then
  2312. linked_from = {linked_from}
  2313. end
  2314. for _, v in ipairs(linked_from) do
  2315. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', v}, parsed['path_query'])
  2316. end
  2317. end
  2318. -- Save it as a content-type, if we have one
  2319. if(contenttype) then
  2320. stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'content-type', contenttype}, parsed['path_query'])
  2321. end
  2322. end
  2323. local function get_default_timeout( nmap_timing )
  2324. local timeout = {}
  2325. if nmap_timing >= 0 and nmap_timing <= 3 then
  2326. timeout.connect = 10000
  2327. timeout.request = 15000
  2328. end
  2329. if nmap_timing >= 4 then
  2330. timeout.connect = 5000
  2331. timeout.request = 10000
  2332. end
  2333. if nmap_timing >= 5 then
  2334. timeout.request = 7000
  2335. end
  2336. return timeout
  2337. end
  2338. return _ENV;