PageRenderTime 59ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/mordor/uri.rl

http://github.com/mozy/mordor
Unknown | 1098 lines | 980 code | 118 blank | 0 comment | 0 complexity | 3bc095e486c515bba2d2b59b27e264e4 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. // Copyright (c) 2009 - Mozy, Inc.
  2. /* To compile to .cpp:
  3. ragel uri.rl -G2 -o uri.cpp
  4. */
  5. #include "mordor/pch.h"
  6. #include "mordor/uri.h"
  7. #include <sstream>
  8. #include "mordor/ragel.h"
  9. #include "mordor/string.h"
  10. #include "mordor/version.h"
  11. namespace Mordor {
  12. static const std::string unreserved("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~");
  13. static const std::string sub_delims("!$&'()*+,;=");
  14. static const std::string scheme("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.");
  15. static const std::string userinfo("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":");
  16. static const std::string host("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":");
  17. static const std::string pchar("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@");
  18. static const std::string path("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@" "/");
  19. static const std::string segment_nc("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" "@");
  20. static const std::string query("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@" "/?");
  21. static const std::string queryString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$'()*," ":@" "/?");
  22. static std::string escape(const std::string& str, const std::string& allowedChars, bool spaceAsPlus = false)
  23. {
  24. const char *hexdigits = "0123456789ABCDEF";
  25. std::string result(str);
  26. const char *c = str.c_str();
  27. const char *end = c + str.length();
  28. bool differed = false;
  29. while(c < end)
  30. {
  31. if (allowedChars.find(*c) == std::string::npos) {
  32. if (!differed) {
  33. result.erase(c - str.c_str());
  34. differed = true;
  35. }
  36. if (*c == ' ' && spaceAsPlus) {
  37. result.append(1, '+');
  38. } else {
  39. result.append(1, '%');
  40. result.append(1, hexdigits[(unsigned char)*c >> 4]);
  41. result.append(1, hexdigits[*c & 0xf]);
  42. }
  43. } else {
  44. if (differed) {
  45. result.append(1, *c);
  46. }
  47. }
  48. ++c;
  49. }
  50. if (differed) {
  51. MORDOR_ASSERT(result.length() >= str.length());
  52. } else {
  53. MORDOR_ASSERT(result == str);
  54. }
  55. return result;
  56. }
  57. std::string unescape(const std::string& str, bool spaceAsPlus = false)
  58. {
  59. std::string result = str;
  60. const char *c = str.c_str();
  61. const char *end = c + str.length();
  62. bool differed = false;
  63. while (c < end)
  64. {
  65. if (*c == '%') {
  66. if (c + 2 >= end)
  67. MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
  68. if (!differed) {
  69. result.erase(c - str.c_str());
  70. differed = true;
  71. }
  72. char decoded;
  73. ++c;
  74. if (*c >= 'a' && *c <= 'f')
  75. decoded = (*c - 'a' + 10) << 4;
  76. else if (*c >= 'A' && *c <= 'F')
  77. decoded = (*c - 'A' + 10) << 4;
  78. else {
  79. if (*c < '0' || *c > '9')
  80. MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
  81. decoded = (*c - '0') << 4;
  82. }
  83. ++c;
  84. if (*c >= 'a' && *c <= 'f')
  85. decoded |= *c - 'a' + 10;
  86. else if (*c >= 'A' && *c <= 'F')
  87. decoded |= *c - 'A' + 10;
  88. else {
  89. if (*c < '0' || *c > '9')
  90. MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
  91. decoded |= *c - '0';
  92. }
  93. result.append(1, decoded);
  94. } else if (*c == '+' && spaceAsPlus) {
  95. if (!differed) {
  96. result.erase(c - str.c_str());
  97. differed = true;
  98. }
  99. result.append(1, ' ');
  100. } else if (differed) {
  101. result.append(1, *c);
  102. }
  103. ++c;
  104. }
  105. return result;
  106. }
  107. std::string
  108. URI::encode(const std::string &str, CharacterClass charClass)
  109. {
  110. switch (charClass) {
  111. case UNRESERVED:
  112. return escape(str, unreserved, false);
  113. case QUERYSTRING:
  114. return escape(str, Mordor::queryString, true);
  115. default:
  116. MORDOR_NOTREACHED();
  117. }
  118. }
  119. std::string
  120. URI::decode(const std::string &str, CharacterClass charClass)
  121. {
  122. switch (charClass) {
  123. case UNRESERVED:
  124. return unescape(str, false);
  125. case QUERYSTRING:
  126. return unescape(str, true);
  127. default:
  128. MORDOR_NOTREACHED();
  129. }
  130. }
  131. %%{
  132. # See RFC 3986: http://www.ietf.org/rfc/rfc3986.txt
  133. machine uri_parser;
  134. gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@";
  135. sub_delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
  136. reserved = gen_delims | sub_delims;
  137. unreserved = alpha | digit | "-" | "." | "_" | "~";
  138. pct_encoded = "%" xdigit xdigit;
  139. action marku { mark = fpc; }
  140. action markh { mark = fpc; }
  141. action save_scheme
  142. {
  143. m_uri->scheme(unescape(std::string(mark, fpc - mark)));
  144. mark = NULL;
  145. }
  146. scheme = (alpha (alpha | digit | "+" | "-" | ".")*) >marku %save_scheme;
  147. action save_port
  148. {
  149. if (fpc == mark)
  150. m_authority->port(-1);
  151. else
  152. m_authority->port(atoi(mark));
  153. mark = NULL;
  154. }
  155. action save_userinfo
  156. {
  157. m_authority->userinfo(unescape(std::string(mark, fpc - mark)));
  158. mark = NULL;
  159. }
  160. action save_host
  161. {
  162. if (mark != NULL) {
  163. m_authority->host(unescape(std::string(mark, fpc - mark)));
  164. mark = NULL;
  165. }
  166. }
  167. userinfo = (unreserved | pct_encoded | sub_delims | ":")*;
  168. dec_octet = digit | [1-9] digit | "1" digit{2} | 2 [0-4] digit | "25" [0-5];
  169. IPv4address = dec_octet "." dec_octet "." dec_octet "." dec_octet;
  170. h16 = xdigit{1,4};
  171. ls32 = (h16 ":" h16) | IPv4address;
  172. IPv6address = ( (h16 ":"){6} ls32) |
  173. ( "::" (h16 ":"){5} ls32) |
  174. (( h16)? "::" (h16 ":"){4} ls32) |
  175. (((h16 ":"){1} h16)? "::" (h16 ":"){3} ls32) |
  176. (((h16 ":"){2} h16)? "::" (h16 ":"){2} ls32) |
  177. (((h16 ":"){3} h16)? "::" (h16 ":"){1} ls32) |
  178. (((h16 ":"){4} h16)? "::" ls32) |
  179. (((h16 ":"){5} h16)? "::" h16 ) |
  180. (((h16 ":"){6} h16)? "::" );
  181. IPvFuture = "v" xdigit+ "." (unreserved | sub_delims | ":")+;
  182. IP_literal = "[" (IPv6address | IPvFuture) "]";
  183. reg_name = (unreserved | pct_encoded | sub_delims)*;
  184. host = IP_literal | IPv4address | reg_name;
  185. port = digit*;
  186. authority = ( (userinfo %save_userinfo "@")? host >markh %save_host (":" port >markh %save_port)? ) >markh;
  187. action save_segment
  188. {
  189. m_segments->push_back(unescape(std::string(mark, fpc - mark)));
  190. mark = NULL;
  191. }
  192. pchar = unreserved | pct_encoded | sub_delims | ":" | "@";
  193. segment = pchar* >marku %save_segment;
  194. segment_nz = pchar+ >marku %save_segment;
  195. segment_nz_nc = (pchar - ":")+ >marku %save_segment;
  196. action clear_segments
  197. {
  198. m_segments->clear();
  199. }
  200. path_abempty = (("/" >marku >save_segment segment) %marku %save_segment)? ("/" segment)*;
  201. path_absolute = ("/" >marku >save_segment (segment_nz ("/" segment)*)?) %marku %save_segment;
  202. path_noscheme = segment_nz_nc ("/" segment)*;
  203. path_rootless = segment_nz ("/" segment)*;
  204. path_empty = "";
  205. path = (path_abempty | path_absolute | path_noscheme | path_rootless | path_empty);
  206. action save_query
  207. {
  208. m_uri->m_query = std::string(mark, fpc - mark);
  209. m_uri->m_queryDefined = true;
  210. mark = NULL;
  211. }
  212. action save_fragment
  213. {
  214. m_uri->fragment(unescape(std::string(mark, fpc - mark)));
  215. mark = NULL;
  216. }
  217. query = (pchar | "/" | "?")* >marku %save_query;
  218. fragment = (pchar | "/" | "?")* >marku %save_fragment;
  219. hier_part = ("//" %clear_segments authority path_abempty) | path_absolute | path_rootless | path_empty;
  220. relative_part = ("//" %clear_segments authority path_abempty) | path_absolute | path_noscheme | path_empty;
  221. relative_ref = relative_part ( "?" query )? ( "#" fragment )?;
  222. absolute_URI = scheme ":" hier_part ( "?" query )? ;
  223. # Obsolete, but referenced from HTTP, so we translate
  224. relative_URI = relative_part ( "?" query )?;
  225. URI = scheme ":" hier_part ( "?" query )? ( "#" fragment )?;
  226. URI_reference = URI | relative_ref;
  227. }%%
  228. %%{
  229. machine uri_parser_proper;
  230. include uri_parser;
  231. main := URI_reference;
  232. write data;
  233. }%%
  234. class URIParser : public RagelParser
  235. {
  236. public:
  237. URIParser(URI& uri)
  238. {
  239. m_uri = &uri;
  240. m_segments = &m_uri->path.segments;
  241. m_authority = &m_uri->authority;
  242. }
  243. void init()
  244. {
  245. RagelParser::init();
  246. %% write init;
  247. }
  248. protected:
  249. void exec()
  250. {
  251. #ifdef MSVC
  252. #pragma warning(push)
  253. #pragma warning(disable : 4244)
  254. #endif
  255. %% write exec;
  256. #ifdef MSVC
  257. #pragma warning(pop)
  258. #endif
  259. }
  260. public:
  261. bool complete() const
  262. {
  263. return false;
  264. }
  265. bool final() const
  266. {
  267. return cs >= uri_parser_proper_first_final;
  268. }
  269. bool error() const
  270. {
  271. return cs == uri_parser_proper_error;
  272. }
  273. private:
  274. URI *m_uri;
  275. std::vector<std::string> *m_segments;
  276. URI::Authority *m_authority;
  277. };
  278. %%{
  279. machine uri_path_parser;
  280. include uri_parser;
  281. main := path;
  282. write data;
  283. }%%
  284. class URIPathParser : public RagelParser
  285. {
  286. public:
  287. URIPathParser(std::vector<std::string> &segments)
  288. {
  289. m_segments = &segments;
  290. }
  291. void init()
  292. {
  293. RagelParser::init();
  294. %% write init;
  295. }
  296. protected:
  297. void exec()
  298. {
  299. #ifdef MSVC
  300. #pragma warning(push)
  301. #pragma warning(disable : 4244)
  302. #endif
  303. %% write exec;
  304. #ifdef MSVC
  305. #pragma warning(pop)
  306. #endif
  307. }
  308. public:
  309. bool complete() const
  310. {
  311. return false;
  312. }
  313. bool final() const
  314. {
  315. return cs >= uri_path_parser_first_final;
  316. }
  317. bool error() const
  318. {
  319. return cs == uri_path_parser_error;
  320. }
  321. private:
  322. std::vector<std::string> *m_segments;
  323. };
  324. %%{
  325. machine uri_authority_parser;
  326. include uri_parser;
  327. main := authority;
  328. write data;
  329. }%%
  330. class URIAuthorityParser : public RagelParser
  331. {
  332. public:
  333. URIAuthorityParser(URI::Authority &authority)
  334. {
  335. m_authority = &authority;
  336. }
  337. void init()
  338. {
  339. RagelParser::init();
  340. %% write init;
  341. }
  342. protected:
  343. void exec()
  344. {
  345. #ifdef MSVC
  346. #pragma warning(push)
  347. #pragma warning(disable : 4244)
  348. #endif
  349. %% write exec;
  350. #ifdef MSVC
  351. #pragma warning(pop)
  352. #endif
  353. }
  354. public:
  355. bool complete() const
  356. {
  357. return false;
  358. }
  359. bool final() const
  360. {
  361. return cs >= uri_authority_parser_first_final;
  362. }
  363. bool error() const
  364. {
  365. return cs == uri_authority_parser_error;
  366. }
  367. private:
  368. URI::Authority *m_authority;
  369. };
  370. #ifdef MSVC
  371. #pragma warning(push)
  372. #pragma warning(disable: 4355)
  373. #endif
  374. URI::URI()
  375. : path(*this)
  376. {
  377. reset();
  378. }
  379. URI::URI(const std::string& uri)
  380. : path(*this)
  381. {
  382. reset();
  383. *this = uri;
  384. }
  385. URI::URI(const char *uri)
  386. : path(*this)
  387. {
  388. reset();
  389. *this = uri;
  390. }
  391. URI::URI(const Buffer &uri)
  392. : path(*this)
  393. {
  394. reset();
  395. *this = uri;
  396. }
  397. URI::URI(const URI &uri)
  398. : authority(uri.authority),
  399. path(*this, uri.path),
  400. m_scheme(uri.m_scheme),
  401. m_query(uri.m_query),
  402. m_fragment(uri.m_fragment),
  403. m_schemeDefined(uri.m_schemeDefined),
  404. m_queryDefined(uri.m_queryDefined),
  405. m_fragmentDefined(uri.m_fragmentDefined)
  406. {}
  407. #ifdef MSVC
  408. #pragma warning(pop)
  409. #endif
  410. URI&
  411. URI::operator=(const std::string& uri)
  412. {
  413. reset();
  414. URIParser parser(*this);
  415. parser.run(uri);
  416. if (parser.error() || !parser.final())
  417. MORDOR_THROW_EXCEPTION(std::invalid_argument("uri"));
  418. return *this;
  419. }
  420. URI&
  421. URI::operator=(const Buffer &uri)
  422. {
  423. reset();
  424. URIParser parser(*this);
  425. parser.run(uri);
  426. if (parser.error() || !parser.final())
  427. MORDOR_THROW_EXCEPTION(std::invalid_argument("uri"));
  428. return *this;
  429. }
  430. void
  431. URI::reset()
  432. {
  433. schemeDefined(false);
  434. authority.hostDefined(false);
  435. path.segments.clear();
  436. queryDefined(false);
  437. fragmentDefined(false);
  438. }
  439. URI::Authority::Authority()
  440. {
  441. userinfoDefined(false);
  442. hostDefined(false);
  443. portDefined(false);
  444. }
  445. URI::Authority::Authority(const char *authority)
  446. {
  447. userinfoDefined(false);
  448. hostDefined(false);
  449. portDefined(false);
  450. *this = authority;
  451. }
  452. URI::Authority::Authority(const std::string& authority)
  453. {
  454. userinfoDefined(false);
  455. hostDefined(false);
  456. portDefined(false);
  457. *this = authority;
  458. }
  459. URI::Authority&
  460. URI::Authority::operator=(const std::string& authority)
  461. {
  462. URIAuthorityParser parser(*this);
  463. parser.run(authority);
  464. if (parser.error() || !parser.final())
  465. MORDOR_THROW_EXCEPTION(std::invalid_argument("authority"));
  466. return *this;
  467. }
  468. void
  469. URI::Authority::normalize(const std::string& defaultHost, bool emptyHostValid,
  470. int defaultPort, bool emptyPortValid)
  471. {
  472. for(size_t i = 0; i < m_host.length(); ++i)
  473. m_host[i] = tolower(m_host[i]);
  474. if (m_port == defaultPort)
  475. m_port = -1;
  476. if (m_port == -1 && !emptyPortValid)
  477. m_portDefined = false;
  478. if (m_host == defaultHost)
  479. m_host.clear();
  480. if (m_host.empty() && !emptyHostValid && !m_userinfoDefined && !m_portDefined)
  481. m_hostDefined = false;
  482. }
  483. std::string
  484. URI::Authority::toString() const
  485. {
  486. std::ostringstream os;
  487. os << *this;
  488. return os.str();
  489. }
  490. static int boolcmp(bool lhs, bool rhs)
  491. {
  492. if (!lhs && rhs)
  493. return -1;
  494. if (lhs && !rhs)
  495. return 1;
  496. return 0;
  497. }
  498. int
  499. URI::Authority::cmp(const Authority &rhs) const
  500. {
  501. int x = boolcmp(m_hostDefined, rhs.m_hostDefined);
  502. if (x != 0) return x;
  503. x = strcmp(m_host.c_str(), rhs.m_host.c_str());
  504. if (x != 0) return x;
  505. x = boolcmp(m_portDefined, rhs.m_portDefined);
  506. if (x != 0) return x;
  507. x = m_port - rhs.m_port;
  508. if (x != 0) return x;
  509. x = boolcmp(m_userinfoDefined, rhs.m_userinfoDefined);
  510. if (x != 0) return x;
  511. return strcmp(m_userinfo.c_str(), rhs.m_userinfo.c_str());
  512. }
  513. std::ostream&
  514. operator<<(std::ostream& os, const URI::Authority& authority)
  515. {
  516. MORDOR_ASSERT(authority.hostDefined());
  517. if (authority.userinfoDefined()) {
  518. os << escape(authority.userinfo(), userinfo) << "@";
  519. }
  520. os << escape(authority.host(), host);
  521. if (authority.portDefined()) {
  522. os << ":";
  523. if (authority.port() > 0) {
  524. os << authority.port();
  525. }
  526. }
  527. return os;
  528. }
  529. URI::Path::Path(const URI &uri)
  530. : m_uri(&uri)
  531. {}
  532. URI::Path::Path(const URI &uri, const Path &path)
  533. : segments(path.segments),
  534. m_uri(&uri)
  535. {}
  536. URI::Path::Path()
  537. : m_uri(NULL)
  538. {}
  539. URI::Path::Path(const char *path)
  540. : m_uri(NULL)
  541. {
  542. *this = path;
  543. }
  544. URI::Path::Path(const std::string &path)
  545. : m_uri(NULL)
  546. {
  547. *this = path;
  548. }
  549. URI::Path::Path(const Path &path)
  550. : segments(path.segments),
  551. m_uri(NULL)
  552. {
  553. segments = path.segments;
  554. }
  555. URI::Path &
  556. URI::Path::operator=(const std::string &path)
  557. {
  558. std::vector<std::string> result;
  559. URIPathParser parser(result);
  560. parser.run(path);
  561. if (parser.error() || !parser.final())
  562. MORDOR_THROW_EXCEPTION(std::invalid_argument("path"));
  563. segments.swap(result);
  564. return *this;
  565. }
  566. URI::Path &
  567. URI::Path::operator=(const Path &path)
  568. {
  569. segments = path.segments;
  570. // Do not copy m_uri
  571. return *this;
  572. }
  573. void
  574. URI::Path::makeAbsolute()
  575. {
  576. if (segments.empty()) {
  577. segments.push_back(std::string());
  578. segments.push_back(std::string());
  579. } else if (!segments.front().empty()) {
  580. segments.insert(segments.begin(), std::string());
  581. }
  582. }
  583. void
  584. URI::Path::makeRelative()
  585. {
  586. if (!segments.empty() && segments.front().empty()) {
  587. segments.erase(segments.begin());
  588. if (segments.size() == 1u && segments.front().empty())
  589. segments.clear();
  590. }
  591. }
  592. void
  593. URI::Path::append(const std::string &segment)
  594. {
  595. if (m_uri && segments.empty() && m_uri->authority.hostDefined()) {
  596. segments.push_back(std::string());
  597. segments.push_back(segment);
  598. } else if (segments.empty() || !segments[segments.size() - 1].empty() ||
  599. // Special case for degenerate single-empty-segment path
  600. (segments.size() == 1 && segments.front().empty())) {
  601. segments.push_back(segment);
  602. } else {
  603. segments[segments.size() - 1] = segment;
  604. }
  605. }
  606. void
  607. URI::Path::removeDotComponents()
  608. {
  609. for(size_t i = 0; i < segments.size(); ++i) {
  610. if (i == 0 && segments[i].empty())
  611. continue;
  612. if (segments[i] == ".") {
  613. if (i + 1 == segments.size()) {
  614. segments[i].clear();
  615. continue;
  616. } else {
  617. segments.erase(segments.begin() + i);
  618. --i;
  619. continue;
  620. }
  621. }
  622. if (segments[i] == "..") {
  623. if (i == 0) {
  624. segments.erase(segments.begin());
  625. --i;
  626. continue;
  627. }
  628. if (i == 1 && segments.front().empty()) {
  629. segments.erase(segments.begin() + i);
  630. --i;
  631. continue;
  632. }
  633. if (i + 1 == segments.size()) {
  634. segments.resize(segments.size() - 1);
  635. segments.back().clear();
  636. --i;
  637. continue;
  638. }
  639. segments.erase(segments.begin() + i - 1, segments.begin() + i + 1);
  640. i -= 2;
  641. continue;
  642. }
  643. }
  644. }
  645. void
  646. URI::Path::normalize(bool emptyPathValid)
  647. {
  648. removeDotComponents();
  649. }
  650. void
  651. URI::Path::merge(const Path& rhs)
  652. {
  653. MORDOR_ASSERT(rhs.isRelative());
  654. if (!segments.empty()) {
  655. segments.pop_back();
  656. segments.insert(segments.end(), rhs.segments.begin(), rhs.segments.end());
  657. } else {
  658. segments = rhs.segments;
  659. }
  660. }
  661. URI::Path::path_serializer
  662. URI::Path::serialize(bool schemeless) const
  663. {
  664. path_serializer result;
  665. result.p = this;
  666. result.schemeless = schemeless;
  667. return result;
  668. }
  669. std::string
  670. URI::Path::toString() const
  671. {
  672. std::ostringstream os;
  673. os << *this;
  674. return os.str();
  675. }
  676. std::ostream&
  677. operator<<(std::ostream& os, const URI::Path::path_serializer &p)
  678. {
  679. const std::vector<std::string> &segments = p.p->segments;
  680. for (std::vector<std::string>::const_iterator it = segments.begin();
  681. it != segments.end();
  682. ++it) {
  683. if (it != segments.begin())
  684. os << '/';
  685. if (it == segments.begin() && p.schemeless)
  686. os << escape(*it, segment_nc);
  687. else
  688. os << escape(*it, pchar);
  689. }
  690. return os;
  691. }
  692. std::ostream&
  693. operator<<(std::ostream& os, const URI::Path& path)
  694. {
  695. return os << path.serialize();
  696. }
  697. int
  698. URI::Path::cmp(const Path &rhs) const
  699. {
  700. std::vector<std::string>::const_iterator itl, itr;
  701. itl = segments.begin(); itr = rhs.segments.begin();
  702. while (true) {
  703. if (itl == segments.end() && itr != rhs.segments.end())
  704. return -1;
  705. if (itl != segments.end() && itr == rhs.segments.end())
  706. return 1;
  707. if (itl == segments.end() && itr == rhs.segments.end())
  708. return 0;
  709. int x = strcmp(itl->c_str(), itr->c_str());
  710. if (x != 0) return x;
  711. ++itl; ++itr;
  712. }
  713. }
  714. void
  715. URI::normalize()
  716. {
  717. for (size_t i = 0; i < m_scheme.size(); ++i)
  718. m_scheme[i] = tolower(m_scheme[i]);
  719. if (m_scheme == "http" || m_scheme == "https") {
  720. authority.normalize("", false, m_scheme.size() == 4 ? 80 : 443, false);
  721. path.normalize();
  722. } else if (m_scheme == "file") {
  723. authority.normalize("localhost", true);
  724. path.normalize();
  725. } else if (m_scheme == "socks") {
  726. authority.normalize("", false, 1080, false);
  727. path.normalize();
  728. } else {
  729. authority.normalize();
  730. path.normalize();
  731. }
  732. }
  733. std::string
  734. URI::query() const
  735. {
  736. MORDOR_ASSERT(m_queryDefined);
  737. return unescape(m_query);
  738. }
  739. void
  740. URI::query(const std::string &q)
  741. {
  742. m_queryDefined = true;
  743. m_query = escape(q, Mordor::query);
  744. }
  745. std::string
  746. URI::toString() const
  747. {
  748. std::ostringstream os;
  749. os << *this;
  750. return os.str();
  751. }
  752. std::ostream&
  753. operator<<(std::ostream& os, const URI& uri)
  754. {
  755. MORDOR_ASSERT(!uri.authority.hostDefined() || uri.path.isAbsolute() ||
  756. uri.path.isEmpty());
  757. if (uri.schemeDefined())
  758. os << escape(uri.scheme(), scheme) << ":";
  759. if (uri.authority.hostDefined()) {
  760. os << "//" << uri.authority;
  761. // authority is always part of hier_part, which only allows
  762. // path_abempty
  763. MORDOR_ASSERT(uri.path.isAbsolute() || uri.path.isEmpty());
  764. }
  765. // Has scheme, but no authority, must ensure that an absolute path
  766. // doesn't begin with an empty segment (or could be mistaken for authority)
  767. if (uri.schemeDefined() && !uri.authority.hostDefined() &&
  768. uri.path.isAbsolute() &&
  769. uri.path.segments.size() >= 3 && uri.path.segments[1].empty()) {
  770. os << "//";
  771. }
  772. os << uri.path.serialize(!uri.schemeDefined());
  773. if (uri.queryDefined())
  774. os << "?" << uri.m_query;
  775. if (uri.fragmentDefined())
  776. os << "#" << escape(uri.fragment(), query);
  777. return os;
  778. }
  779. URI
  780. URI::transform(const URI& base, const URI& relative)
  781. {
  782. MORDOR_ASSERT(base.schemeDefined());
  783. URI target;
  784. if (relative.schemeDefined()) {
  785. target.scheme(relative.scheme());
  786. target.authority = relative.authority;
  787. target.path = relative.path;
  788. target.path.removeDotComponents();
  789. target.m_query = relative.m_query;
  790. target.m_queryDefined = relative.m_queryDefined;
  791. } else {
  792. if (relative.authority.hostDefined()) {
  793. target.authority = relative.authority;
  794. target.path = relative.path;
  795. target.path.removeDotComponents();
  796. target.m_query = relative.m_query;
  797. target.m_queryDefined = relative.m_queryDefined;
  798. } else {
  799. if (relative.path.isEmpty()) {
  800. target.path = base.path;
  801. if (relative.queryDefined()) {
  802. target.query(relative.query());
  803. } else {
  804. target.m_query = base.m_query;
  805. target.m_queryDefined = base.m_queryDefined;
  806. }
  807. } else {
  808. if (relative.path.isAbsolute()) {
  809. target.path = relative.path;
  810. } else {
  811. if (base.authority.hostDefined() && base.path.isEmpty()) {
  812. target.path.segments.push_back(std::string());
  813. target.path.segments.push_back(std::string());
  814. } else {
  815. target.path = base.path;
  816. }
  817. target.path.merge(relative.path);
  818. }
  819. target.path.removeDotComponents();
  820. target.m_query = relative.m_query;
  821. target.m_queryDefined = relative.m_queryDefined;
  822. }
  823. target.authority = base.authority;
  824. }
  825. target.scheme(base.scheme());
  826. }
  827. target.m_fragment = relative.m_fragment;
  828. target.m_fragmentDefined = relative.m_fragmentDefined;
  829. return target;
  830. }
  831. int
  832. URI::cmp(const URI &rhs) const
  833. {
  834. int x = boolcmp(m_schemeDefined, rhs.m_schemeDefined);
  835. if (x != 0) return x;
  836. x = strcmp(m_scheme.c_str(), rhs.m_scheme.c_str());
  837. if (x != 0) return x;
  838. x = authority.cmp(rhs.authority);
  839. if (x != 0) return x;
  840. x = path.cmp(rhs.path);
  841. if (x != 0) return x;
  842. x = boolcmp(m_queryDefined, rhs.m_queryDefined);
  843. if (x != 0) return x;
  844. x = strcmp(m_query.c_str(), rhs.m_query.c_str());
  845. if (x != 0) return x;
  846. x = boolcmp(m_fragmentDefined, rhs.m_fragmentDefined);
  847. if (x != 0) return x;
  848. return strcmp(m_fragment.c_str(), rhs.m_fragment.c_str());
  849. }
  850. %%{
  851. machine querystring_parser;
  852. action mark { mark = fpc; }
  853. action saveKey {
  854. m_iterator = m_qs.insert(std::make_pair(
  855. unescape(std::string(mark, fpc - mark), true), std::string()));
  856. mark = NULL;
  857. }
  858. action saveValue {
  859. MORDOR_ASSERT(m_iterator != m_qs.end());
  860. if (fpc - mark == 0 && m_iterator->first.empty())
  861. m_qs.erase(m_iterator);
  862. else
  863. m_iterator->second = unescape(std::string(mark, fpc - mark), true);
  864. m_iterator = m_qs.end();
  865. mark = NULL;
  866. }
  867. action saveNoValue {
  868. if (m_iterator != m_qs.end() && m_iterator->first.empty()) {
  869. m_qs.erase(m_iterator);
  870. mark = NULL;
  871. }
  872. }
  873. sub_delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";";
  874. unreserved = alpha | digit | "-" | "." | "_" | "~";
  875. pct_encoded = "%" xdigit xdigit;
  876. pchar = unreserved | pct_encoded | sub_delims | ":" | "@";
  877. querychar = (pchar | "/" | "?") -- '&' -- ';';
  878. key = querychar*;
  879. value = (querychar | '=')*;
  880. keyValue = key >mark %saveKey ('=' value >mark %saveValue)? %saveNoValue;
  881. main := keyValue? ( ('&' | ';') keyValue? )*;
  882. write data;
  883. }%%
  884. class QueryStringParser : public RagelParser
  885. {
  886. public:
  887. QueryStringParser(URI::QueryString &qs)
  888. : m_qs(qs),
  889. m_iterator(m_qs.end())
  890. {}
  891. void init()
  892. {
  893. RagelParser::init();
  894. %% write init;
  895. }
  896. void exec()
  897. {
  898. #ifdef MSVC
  899. #pragma warning(push)
  900. #pragma warning(disable : 4244)
  901. #endif
  902. %% write exec;
  903. #ifdef MSVC
  904. #pragma warning(pop)
  905. #endif
  906. }
  907. bool complete() const { return false; }
  908. bool final() const
  909. {
  910. return cs >= querystring_parser_first_final;
  911. }
  912. bool error() const
  913. {
  914. return cs == querystring_parser_error;
  915. }
  916. private:
  917. URI::QueryString &m_qs;
  918. URI::QueryString::iterator m_iterator;
  919. };
  920. URI::QueryString &
  921. URI::QueryString::operator =(const std::string &string)
  922. {
  923. clear();
  924. QueryStringParser parser(*this);
  925. parser.run(string);
  926. if (!parser.final() || parser.error())
  927. MORDOR_THROW_EXCEPTION(std::invalid_argument("Invalid QueryString"));
  928. return *this;
  929. }
  930. URI::QueryString &
  931. URI::QueryString::operator =(Stream &stream)
  932. {
  933. clear();
  934. QueryStringParser parser(*this);
  935. parser.run(stream);
  936. if (!parser.final() || parser.error())
  937. MORDOR_THROW_EXCEPTION(std::invalid_argument("Invalid QueryString"));
  938. return *this;
  939. }
  940. std::string
  941. URI::QueryString::toString() const
  942. {
  943. std::ostringstream os;
  944. for (const_iterator it = begin();
  945. it != end();
  946. ++it) {
  947. if (it != begin()) {
  948. os << '&';
  949. }
  950. os << escape(it->first, Mordor::queryString, true);
  951. if (!it->second.empty())
  952. os << '=' << escape(it->second, Mordor::queryString, true);
  953. }
  954. return os.str();
  955. }
  956. std::string &
  957. URI::QueryString::operator[](const std::string &key)
  958. {
  959. std::pair<iterator, iterator> its = equal_range(key);
  960. // Did not exist; create it
  961. if (its.first == its.second)
  962. return insert(std::make_pair(key, std::string()))->second;
  963. // Multiple instances; remove all but the first
  964. iterator next = its.first;
  965. ++next;
  966. erase(next, its.second);
  967. // Left with a single (first) instance; return it
  968. return its.first->second;
  969. }
  970. std::string
  971. URI::QueryString::operator[](const std::string &key) const
  972. {
  973. std::pair<const_iterator, const_iterator> its = equal_range(key);
  974. // Did not exist
  975. if (its.first == its.second)
  976. return std::string();
  977. // Return only the first instance
  978. return its.first->second;
  979. }
  980. }