PageRenderTime 128ms CodeModel.GetById 80ms app.highlight 9ms RepoModel.GetById 31ms app.codeStats 0ms

/mordor/uri.rl

http://github.com/mozy/mordor
Unknown | 1098 lines | 980 code | 118 blank | 0 comment | 0 complexity | 3bc095e486c515bba2d2b59b27e264e4 MD5 | raw file
   1// Copyright (c) 2009 - Mozy, Inc.
   2/* To compile to .cpp:
   3   ragel uri.rl -G2 -o uri.cpp
   4*/
   5
   6#include "mordor/pch.h"
   7
   8#include "mordor/uri.h"
   9
  10#include <sstream>
  11
  12#include "mordor/ragel.h"
  13#include "mordor/string.h"
  14#include "mordor/version.h"
  15
  16namespace Mordor {
  17
  18static const std::string unreserved("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~");
  19static const std::string sub_delims("!$&'()*+,;=");
  20static const std::string scheme("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.");
  21static const std::string userinfo("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":");
  22static const std::string host("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":");
  23static const std::string pchar("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@");
  24static const std::string path("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@" "/");
  25static const std::string segment_nc("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" "@");
  26static const std::string query("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$&'()*+,;=" ":@" "/?");
  27static const std::string queryString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~" "!$'()*," ":@" "/?");
  28
  29static std::string escape(const std::string& str, const std::string& allowedChars, bool spaceAsPlus = false)
  30{
  31    const char *hexdigits = "0123456789ABCDEF";
  32    std::string result(str);
  33
  34    const char *c = str.c_str();
  35    const char *end = c + str.length();
  36    bool differed = false;
  37    while(c < end)
  38    {
  39        if (allowedChars.find(*c) == std::string::npos) {
  40            if (!differed) {
  41                result.erase(c - str.c_str());
  42                differed = true;
  43            }
  44            if (*c == ' ' && spaceAsPlus) {
  45                result.append(1, '+');
  46            } else {
  47                result.append(1, '%');
  48                result.append(1, hexdigits[(unsigned char)*c >> 4]);
  49                result.append(1, hexdigits[*c & 0xf]);
  50            }
  51        } else {
  52            if (differed) {
  53                result.append(1, *c);
  54            }
  55        }
  56        ++c;
  57    }
  58
  59    if (differed) {
  60        MORDOR_ASSERT(result.length() >= str.length());
  61    } else {
  62        MORDOR_ASSERT(result == str);
  63    }
  64    return result;
  65}
  66
  67std::string unescape(const std::string& str, bool spaceAsPlus = false)
  68{
  69    std::string result = str;
  70
  71    const char *c = str.c_str();
  72    const char *end = c + str.length();
  73    bool differed = false;
  74    while (c < end)
  75    {
  76        if (*c == '%') {
  77            if (c + 2 >= end)
  78                MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
  79            if (!differed) {
  80                result.erase(c - str.c_str());
  81                differed = true;
  82            }
  83            char decoded;
  84            ++c;
  85            if (*c >= 'a' && *c <= 'f')
  86                decoded = (*c - 'a' + 10) << 4;
  87            else if (*c >= 'A' && *c <= 'F')
  88                decoded = (*c - 'A' + 10) << 4;
  89            else {
  90                if (*c < '0' || *c > '9')
  91                    MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
  92                decoded = (*c - '0') << 4;
  93            }
  94            ++c;
  95            if (*c >= 'a' && *c <= 'f')
  96                decoded |= *c - 'a' + 10;
  97            else if (*c >= 'A' && *c <= 'F')
  98                decoded |= *c - 'A' + 10;
  99            else {
 100                if (*c < '0' || *c > '9')
 101                    MORDOR_THROW_EXCEPTION(std::invalid_argument("str"));
 102                decoded |= *c - '0';
 103            }
 104            result.append(1, decoded);
 105        } else if (*c == '+' && spaceAsPlus) {
 106            if (!differed) {
 107                result.erase(c - str.c_str());
 108                differed = true;
 109            }
 110            result.append(1, ' ');
 111        } else if (differed) {
 112            result.append(1, *c);
 113        }
 114        ++c;
 115    }
 116    return result;
 117}
 118
 119std::string
 120URI::encode(const std::string &str, CharacterClass charClass)
 121{
 122    switch (charClass) {
 123        case UNRESERVED:
 124            return escape(str, unreserved, false);
 125        case QUERYSTRING:
 126            return escape(str, Mordor::queryString, true);
 127        default:
 128            MORDOR_NOTREACHED();
 129    }
 130}
 131
 132std::string
 133URI::decode(const std::string &str, CharacterClass charClass)
 134{
 135    switch (charClass) {
 136        case UNRESERVED:
 137            return unescape(str, false);
 138        case QUERYSTRING:
 139            return unescape(str, true);
 140        default:
 141            MORDOR_NOTREACHED();
 142    }
 143}
 144
 145%%{
 146    # See RFC 3986: http://www.ietf.org/rfc/rfc3986.txt
 147
 148    machine uri_parser;
 149
 150    gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@";
 151    sub_delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
 152    reserved = gen_delims | sub_delims;
 153    unreserved = alpha | digit | "-" | "." | "_" | "~";
 154    pct_encoded = "%" xdigit xdigit;
 155
 156    action marku { mark = fpc; }
 157    action markh { mark = fpc; }
 158    action save_scheme
 159    {
 160        m_uri->scheme(unescape(std::string(mark, fpc - mark)));
 161        mark = NULL;
 162    }
 163
 164    scheme = (alpha (alpha | digit | "+" | "-" | ".")*) >marku %save_scheme;
 165
 166    action save_port
 167    {
 168        if (fpc == mark)
 169            m_authority->port(-1);
 170        else
 171            m_authority->port(atoi(mark));
 172        mark = NULL;
 173    }
 174    action save_userinfo
 175    {
 176        m_authority->userinfo(unescape(std::string(mark, fpc - mark)));
 177        mark = NULL;
 178    }
 179    action save_host
 180    {
 181        if (mark != NULL) {
 182            m_authority->host(unescape(std::string(mark, fpc - mark)));
 183            mark = NULL;
 184        }
 185    }
 186
 187    userinfo = (unreserved | pct_encoded | sub_delims | ":")*;
 188    dec_octet = digit | [1-9] digit | "1" digit{2} | 2 [0-4] digit | "25" [0-5];
 189    IPv4address = dec_octet "." dec_octet "." dec_octet "." dec_octet;
 190    h16 = xdigit{1,4};
 191    ls32 = (h16 ":" h16) | IPv4address;
 192    IPv6address = (                         (h16 ":"){6} ls32) |
 193                  (                    "::" (h16 ":"){5} ls32) |
 194                  ((             h16)? "::" (h16 ":"){4} ls32) |
 195                  (((h16 ":"){1} h16)? "::" (h16 ":"){3} ls32) |
 196                  (((h16 ":"){2} h16)? "::" (h16 ":"){2} ls32) |
 197                  (((h16 ":"){3} h16)? "::" (h16 ":"){1} ls32) |
 198                  (((h16 ":"){4} h16)? "::"              ls32) |
 199                  (((h16 ":"){5} h16)? "::"              h16 ) |
 200                  (((h16 ":"){6} h16)? "::"                  );
 201    IPvFuture = "v" xdigit+ "." (unreserved | sub_delims | ":")+;
 202    IP_literal = "[" (IPv6address | IPvFuture) "]";
 203    reg_name = (unreserved | pct_encoded | sub_delims)*;
 204    host = IP_literal | IPv4address | reg_name;
 205    port = digit*;
 206
 207    authority = ( (userinfo %save_userinfo "@")? host >markh %save_host (":" port >markh %save_port)? ) >markh;
 208
 209    action save_segment
 210    {
 211        m_segments->push_back(unescape(std::string(mark, fpc - mark)));
 212        mark = NULL;
 213    }
 214
 215    pchar = unreserved | pct_encoded | sub_delims | ":" | "@";
 216    segment = pchar* >marku %save_segment;
 217    segment_nz = pchar+ >marku %save_segment;
 218    segment_nz_nc = (pchar - ":")+ >marku %save_segment;
 219
 220    action clear_segments
 221    {
 222        m_segments->clear();
 223    }
 224
 225    path_abempty = (("/" >marku >save_segment segment) %marku %save_segment)? ("/" segment)*;
 226    path_absolute = ("/" >marku >save_segment (segment_nz ("/" segment)*)?) %marku %save_segment;
 227    path_noscheme = segment_nz_nc ("/" segment)*;
 228    path_rootless = segment_nz ("/" segment)*;
 229    path_empty = "";
 230    path = (path_abempty | path_absolute | path_noscheme | path_rootless | path_empty);
 231
 232    action save_query
 233    {
 234        m_uri->m_query = std::string(mark, fpc - mark);
 235        m_uri->m_queryDefined = true;
 236        mark = NULL;
 237    }
 238    action save_fragment
 239    {
 240        m_uri->fragment(unescape(std::string(mark, fpc - mark)));
 241        mark = NULL;
 242    }
 243
 244    query = (pchar | "/" | "?")* >marku %save_query;
 245    fragment = (pchar | "/" | "?")* >marku %save_fragment;
 246
 247    hier_part = ("//" %clear_segments authority path_abempty) | path_absolute | path_rootless | path_empty;
 248
 249    relative_part = ("//" %clear_segments authority path_abempty) | path_absolute | path_noscheme | path_empty;
 250    relative_ref = relative_part ( "?" query )? ( "#" fragment )?;
 251
 252    absolute_URI = scheme ":" hier_part ( "?" query )? ;
 253    # Obsolete, but referenced from HTTP, so we translate
 254    relative_URI = relative_part ( "?" query )?;
 255
 256    URI = scheme ":" hier_part ( "?" query )? ( "#" fragment )?;
 257    URI_reference = URI | relative_ref;
 258}%%
 259
 260%%{
 261        machine uri_parser_proper;
 262        include uri_parser;
 263        main := URI_reference;
 264        write data;
 265}%%
 266
 267class URIParser : public RagelParser
 268{
 269public:
 270    URIParser(URI& uri)
 271    {
 272        m_uri = &uri;
 273        m_segments = &m_uri->path.segments;
 274        m_authority = &m_uri->authority;
 275    }
 276
 277    void init()
 278    {
 279        RagelParser::init();
 280        %% write init;
 281    }
 282
 283protected:
 284    void exec()
 285    {
 286#ifdef MSVC
 287#pragma warning(push)
 288#pragma warning(disable : 4244)
 289#endif
 290        %% write exec;
 291#ifdef MSVC
 292#pragma warning(pop)
 293#endif
 294    }
 295
 296public:
 297    bool complete() const
 298    {
 299        return false;
 300    }
 301
 302    bool final() const
 303    {
 304        return cs >= uri_parser_proper_first_final;
 305    }
 306
 307    bool error() const
 308    {
 309        return cs == uri_parser_proper_error;
 310    }
 311
 312private:
 313    URI *m_uri;
 314    std::vector<std::string> *m_segments;
 315    URI::Authority *m_authority;
 316};
 317
 318%%{
 319    machine uri_path_parser;
 320    include uri_parser;
 321    main := path;
 322    write data;
 323}%%
 324class URIPathParser : public RagelParser
 325{
 326public:
 327    URIPathParser(std::vector<std::string> &segments)
 328    {
 329        m_segments = &segments;
 330    }
 331
 332    void init()
 333    {
 334        RagelParser::init();
 335        %% write init;
 336    }
 337
 338protected:
 339    void exec()
 340    {
 341#ifdef MSVC
 342#pragma warning(push)
 343#pragma warning(disable : 4244)
 344#endif
 345        %% write exec;
 346#ifdef MSVC
 347#pragma warning(pop)
 348#endif
 349    }
 350
 351public:
 352    bool complete() const
 353    {
 354        return false;
 355    }
 356
 357    bool final() const
 358    {
 359        return cs >= uri_path_parser_first_final;
 360    }
 361
 362    bool error() const
 363    {
 364        return cs == uri_path_parser_error;
 365    }
 366
 367private:
 368    std::vector<std::string> *m_segments;
 369};
 370
 371%%{
 372    machine uri_authority_parser;
 373    include uri_parser;
 374    main := authority;
 375    write data;
 376}%%
 377class URIAuthorityParser : public RagelParser
 378{
 379public:
 380    URIAuthorityParser(URI::Authority &authority)
 381    {
 382        m_authority = &authority;
 383    }
 384
 385    void init()
 386    {
 387        RagelParser::init();
 388        %% write init;
 389    }
 390
 391protected:
 392    void exec()
 393    {
 394#ifdef MSVC
 395#pragma warning(push)
 396#pragma warning(disable : 4244)
 397#endif
 398        %% write exec;
 399#ifdef MSVC
 400#pragma warning(pop)
 401#endif
 402    }
 403
 404public:
 405    bool complete() const
 406    {
 407        return false;
 408    }
 409
 410    bool final() const
 411    {
 412        return cs >= uri_authority_parser_first_final;
 413    }
 414
 415    bool error() const
 416    {
 417        return cs == uri_authority_parser_error;
 418    }
 419
 420private:
 421    URI::Authority *m_authority;
 422};
 423
 424#ifdef MSVC
 425#pragma warning(push)
 426#pragma warning(disable: 4355)
 427#endif
 428URI::URI()
 429    : path(*this)
 430{
 431    reset();
 432}
 433
 434URI::URI(const std::string& uri)
 435    : path(*this)
 436{
 437    reset();
 438    *this = uri;
 439}
 440
 441URI::URI(const char *uri)
 442    : path(*this)
 443{
 444    reset();
 445    *this = uri;
 446}
 447
 448URI::URI(const Buffer &uri)
 449    : path(*this)
 450{
 451    reset();
 452    *this = uri;
 453}
 454
 455URI::URI(const URI &uri)
 456    : authority(uri.authority),
 457      path(*this, uri.path),
 458      m_scheme(uri.m_scheme),
 459      m_query(uri.m_query),
 460      m_fragment(uri.m_fragment),
 461      m_schemeDefined(uri.m_schemeDefined),
 462      m_queryDefined(uri.m_queryDefined),
 463      m_fragmentDefined(uri.m_fragmentDefined)
 464{}
 465#ifdef MSVC
 466#pragma warning(pop)
 467#endif
 468
 469URI&
 470URI::operator=(const std::string& uri)
 471{
 472    reset();
 473    URIParser parser(*this);
 474    parser.run(uri);
 475    if (parser.error() || !parser.final())
 476        MORDOR_THROW_EXCEPTION(std::invalid_argument("uri"));
 477    return *this;
 478}
 479
 480URI&
 481URI::operator=(const Buffer &uri)
 482{
 483    reset();
 484    URIParser parser(*this);
 485    parser.run(uri);
 486    if (parser.error() || !parser.final())
 487        MORDOR_THROW_EXCEPTION(std::invalid_argument("uri"));
 488    return *this;
 489}
 490
 491void
 492URI::reset()
 493{
 494    schemeDefined(false);
 495    authority.hostDefined(false);
 496    path.segments.clear();
 497    queryDefined(false);
 498    fragmentDefined(false);
 499}
 500
 501URI::Authority::Authority()
 502{
 503    userinfoDefined(false);
 504    hostDefined(false);
 505    portDefined(false);
 506}
 507
 508URI::Authority::Authority(const char *authority)
 509{
 510    userinfoDefined(false);
 511    hostDefined(false);
 512    portDefined(false);
 513    *this = authority;
 514}
 515
 516URI::Authority::Authority(const std::string& authority)
 517{
 518    userinfoDefined(false);
 519    hostDefined(false);
 520    portDefined(false);
 521    *this = authority;
 522}
 523
 524URI::Authority&
 525URI::Authority::operator=(const std::string& authority)
 526{
 527    URIAuthorityParser parser(*this);
 528    parser.run(authority);
 529    if (parser.error() || !parser.final())
 530        MORDOR_THROW_EXCEPTION(std::invalid_argument("authority"));
 531    return *this;
 532}
 533
 534void
 535URI::Authority::normalize(const std::string& defaultHost, bool emptyHostValid,
 536    int defaultPort, bool emptyPortValid)
 537{
 538    for(size_t i = 0; i < m_host.length(); ++i)
 539        m_host[i] = tolower(m_host[i]);
 540    if (m_port == defaultPort)
 541        m_port = -1;
 542    if (m_port == -1 && !emptyPortValid)
 543        m_portDefined = false;
 544    if (m_host == defaultHost)
 545        m_host.clear();
 546    if (m_host.empty() && !emptyHostValid && !m_userinfoDefined && !m_portDefined)
 547        m_hostDefined = false;
 548}
 549
 550std::string
 551URI::Authority::toString() const
 552{
 553    std::ostringstream os;
 554    os << *this;
 555    return os.str();
 556}
 557
 558static int boolcmp(bool lhs, bool rhs)
 559{
 560    if (!lhs && rhs)
 561        return -1;
 562    if (lhs && !rhs)
 563        return 1;
 564    return 0;
 565}
 566
 567int
 568URI::Authority::cmp(const Authority &rhs) const
 569{
 570    int x = boolcmp(m_hostDefined, rhs.m_hostDefined);
 571    if (x != 0) return x;
 572    x = strcmp(m_host.c_str(), rhs.m_host.c_str());
 573    if (x != 0) return x;
 574    x = boolcmp(m_portDefined, rhs.m_portDefined);
 575    if (x != 0) return x;
 576    x = m_port - rhs.m_port;
 577    if (x != 0) return x;
 578    x = boolcmp(m_userinfoDefined, rhs.m_userinfoDefined);
 579    if (x != 0) return x;
 580    return strcmp(m_userinfo.c_str(), rhs.m_userinfo.c_str());
 581}
 582
 583std::ostream&
 584operator<<(std::ostream& os, const URI::Authority& authority)
 585{
 586    MORDOR_ASSERT(authority.hostDefined());
 587    if (authority.userinfoDefined()) {
 588        os << escape(authority.userinfo(), userinfo) << "@";
 589    }
 590    os << escape(authority.host(), host);
 591    if (authority.portDefined()) {
 592        os << ":";
 593        if (authority.port() > 0) {
 594            os << authority.port();
 595        }
 596    }
 597    return os;
 598}
 599
 600URI::Path::Path(const URI &uri)
 601    : m_uri(&uri)
 602{}
 603
 604URI::Path::Path(const URI &uri, const Path &path)
 605    : segments(path.segments),
 606      m_uri(&uri)
 607{}
 608
 609URI::Path::Path()
 610    : m_uri(NULL)
 611 {}
 612
 613URI::Path::Path(const char *path)
 614    : m_uri(NULL)
 615{
 616    *this = path;
 617}
 618
 619URI::Path::Path(const std::string &path)
 620    : m_uri(NULL)
 621{
 622    *this = path;
 623}
 624
 625URI::Path::Path(const Path &path)
 626    : segments(path.segments),
 627      m_uri(NULL)
 628{
 629    segments = path.segments;
 630}
 631
 632URI::Path &
 633URI::Path::operator=(const std::string &path)
 634{
 635    std::vector<std::string> result;
 636    URIPathParser parser(result);
 637    parser.run(path);
 638    if (parser.error() || !parser.final())
 639        MORDOR_THROW_EXCEPTION(std::invalid_argument("path"));
 640    segments.swap(result);
 641    return *this;
 642}
 643
 644URI::Path &
 645URI::Path::operator=(const Path &path)
 646{
 647    segments = path.segments;
 648    // Do not copy m_uri
 649    return *this;
 650}
 651
 652void
 653URI::Path::makeAbsolute()
 654{
 655    if (segments.empty()) {
 656        segments.push_back(std::string());
 657        segments.push_back(std::string());
 658    } else if (!segments.front().empty()) {
 659        segments.insert(segments.begin(), std::string());
 660    }
 661}
 662
 663void
 664URI::Path::makeRelative()
 665{
 666    if (!segments.empty() && segments.front().empty()) {
 667        segments.erase(segments.begin());
 668        if (segments.size() == 1u && segments.front().empty())
 669            segments.clear();
 670    }
 671}
 672
 673void
 674URI::Path::append(const std::string &segment)
 675{
 676    if (m_uri && segments.empty() && m_uri->authority.hostDefined()) {
 677        segments.push_back(std::string());
 678        segments.push_back(segment);
 679    } else if (segments.empty() || !segments[segments.size() - 1].empty() ||
 680        // Special case for degenerate single-empty-segment path
 681        (segments.size() == 1 && segments.front().empty())) {
 682        segments.push_back(segment);
 683    } else {
 684        segments[segments.size() - 1] = segment;
 685    }
 686}
 687
 688void
 689URI::Path::removeDotComponents()
 690{
 691    for(size_t i = 0; i < segments.size(); ++i) {
 692        if (i == 0 && segments[i].empty())
 693            continue;
 694        if (segments[i] == ".") {
 695            if (i + 1 == segments.size()) {
 696                segments[i].clear();
 697                continue;
 698            } else {
 699                segments.erase(segments.begin() + i);
 700                --i;
 701                continue;
 702            }
 703        }
 704        if (segments[i] == "..") {
 705            if (i == 0) {
 706                segments.erase(segments.begin());
 707                --i;
 708                continue;
 709            }
 710            if (i == 1 && segments.front().empty()) {
 711                segments.erase(segments.begin() + i);
 712                --i;
 713                continue;
 714            }
 715            if (i + 1 == segments.size()) {
 716                segments.resize(segments.size() - 1);
 717                segments.back().clear();
 718                --i;
 719                continue;
 720            }
 721            segments.erase(segments.begin() + i - 1, segments.begin() + i + 1);
 722            i -= 2;
 723            continue;
 724        }
 725    }
 726}
 727
 728void
 729URI::Path::normalize(bool emptyPathValid)
 730{
 731    removeDotComponents();
 732}
 733
 734void
 735URI::Path::merge(const Path& rhs)
 736{
 737    MORDOR_ASSERT(rhs.isRelative());
 738    if (!segments.empty()) {
 739        segments.pop_back();
 740        segments.insert(segments.end(), rhs.segments.begin(), rhs.segments.end());
 741    } else {
 742        segments = rhs.segments;
 743    }
 744}
 745
 746URI::Path::path_serializer
 747URI::Path::serialize(bool schemeless) const
 748{
 749    path_serializer result;
 750    result.p = this;
 751    result.schemeless = schemeless;
 752    return result;
 753}
 754
 755std::string
 756URI::Path::toString() const
 757{
 758    std::ostringstream os;
 759    os << *this;
 760    return os.str();
 761}
 762
 763std::ostream&
 764operator<<(std::ostream& os, const URI::Path::path_serializer &p)
 765{
 766    const std::vector<std::string> &segments = p.p->segments;
 767    for (std::vector<std::string>::const_iterator it = segments.begin();
 768        it != segments.end();
 769        ++it) {
 770        if (it != segments.begin())
 771            os << '/';
 772        if (it == segments.begin() && p.schemeless)
 773            os << escape(*it, segment_nc);
 774        else
 775            os << escape(*it, pchar);
 776    }
 777    return os;
 778}
 779
 780std::ostream&
 781operator<<(std::ostream& os, const URI::Path& path)
 782{
 783    return os << path.serialize();
 784}
 785
 786int
 787URI::Path::cmp(const Path &rhs) const
 788{
 789    std::vector<std::string>::const_iterator itl, itr;
 790    itl = segments.begin(); itr = rhs.segments.begin();
 791    while (true) {
 792        if (itl == segments.end() && itr != rhs.segments.end())
 793            return -1;
 794        if (itl != segments.end() && itr == rhs.segments.end())
 795            return 1;
 796        if (itl == segments.end() && itr == rhs.segments.end())
 797            return 0;
 798        int x = strcmp(itl->c_str(), itr->c_str());
 799        if (x != 0) return x;
 800        ++itl; ++itr;
 801    }
 802}
 803
 804void
 805URI::normalize()
 806{
 807    for (size_t i = 0; i < m_scheme.size(); ++i)
 808        m_scheme[i] = tolower(m_scheme[i]);
 809
 810    if (m_scheme == "http" || m_scheme == "https") {
 811        authority.normalize("", false, m_scheme.size() == 4 ? 80 : 443, false);
 812        path.normalize();
 813    } else if (m_scheme == "file") {
 814        authority.normalize("localhost", true);
 815        path.normalize();
 816    } else if (m_scheme == "socks") {
 817        authority.normalize("", false, 1080, false);
 818        path.normalize();
 819    } else {
 820        authority.normalize();
 821        path.normalize();
 822    }
 823}
 824
 825std::string
 826URI::query() const
 827{
 828    MORDOR_ASSERT(m_queryDefined);
 829    return unescape(m_query);
 830}
 831
 832void
 833URI::query(const std::string &q)
 834{
 835    m_queryDefined = true;
 836    m_query = escape(q, Mordor::query);
 837}
 838
 839std::string
 840URI::toString() const
 841{
 842    std::ostringstream os;
 843    os << *this;
 844    return os.str();
 845}
 846
 847std::ostream&
 848operator<<(std::ostream& os, const URI& uri)
 849{
 850    MORDOR_ASSERT(!uri.authority.hostDefined() || uri.path.isAbsolute() ||
 851        uri.path.isEmpty());
 852    if (uri.schemeDefined())
 853        os << escape(uri.scheme(), scheme) << ":";
 854
 855    if (uri.authority.hostDefined()) {
 856        os << "//" << uri.authority;
 857        // authority is always part of hier_part, which only allows
 858        // path_abempty
 859        MORDOR_ASSERT(uri.path.isAbsolute() || uri.path.isEmpty());
 860    }
 861
 862    // Has scheme, but no authority, must ensure that an absolute path
 863    // doesn't begin with an empty segment (or could be mistaken for authority)
 864    if (uri.schemeDefined() && !uri.authority.hostDefined() &&
 865        uri.path.isAbsolute() &&
 866        uri.path.segments.size() >= 3 && uri.path.segments[1].empty()) {
 867        os << "//";
 868    }
 869    os << uri.path.serialize(!uri.schemeDefined());
 870
 871    if (uri.queryDefined())
 872        os << "?" << uri.m_query;
 873
 874    if (uri.fragmentDefined())
 875        os << "#" << escape(uri.fragment(), query);
 876    return os;
 877}
 878
 879URI
 880URI::transform(const URI& base, const URI& relative)
 881{
 882    MORDOR_ASSERT(base.schemeDefined());
 883
 884    URI target;
 885    if (relative.schemeDefined()) {
 886        target.scheme(relative.scheme());
 887        target.authority = relative.authority;
 888        target.path = relative.path;
 889        target.path.removeDotComponents();
 890        target.m_query = relative.m_query;
 891        target.m_queryDefined = relative.m_queryDefined;
 892    } else {
 893        if (relative.authority.hostDefined()) {
 894            target.authority = relative.authority;
 895            target.path = relative.path;
 896            target.path.removeDotComponents();
 897            target.m_query = relative.m_query;
 898            target.m_queryDefined = relative.m_queryDefined;
 899        } else {
 900            if (relative.path.isEmpty()) {
 901                target.path = base.path;
 902                if (relative.queryDefined()) {
 903                    target.query(relative.query());
 904                } else {
 905                    target.m_query = base.m_query;
 906                    target.m_queryDefined = base.m_queryDefined;
 907                }
 908            } else {
 909                if (relative.path.isAbsolute()) {
 910                    target.path = relative.path;
 911                } else {
 912                    if (base.authority.hostDefined() && base.path.isEmpty()) {
 913                        target.path.segments.push_back(std::string());
 914                        target.path.segments.push_back(std::string());
 915                    } else {
 916                        target.path = base.path;
 917                    }
 918                    target.path.merge(relative.path);
 919                }
 920                target.path.removeDotComponents();
 921                target.m_query = relative.m_query;
 922                target.m_queryDefined = relative.m_queryDefined;
 923            }
 924            target.authority = base.authority;
 925        }
 926        target.scheme(base.scheme());
 927    }
 928    target.m_fragment = relative.m_fragment;
 929    target.m_fragmentDefined = relative.m_fragmentDefined;
 930    return target;
 931}
 932
 933int
 934URI::cmp(const URI &rhs) const
 935{
 936    int x = boolcmp(m_schemeDefined, rhs.m_schemeDefined);
 937    if (x != 0) return x;
 938    x = strcmp(m_scheme.c_str(), rhs.m_scheme.c_str());
 939    if (x != 0) return x;
 940    x = authority.cmp(rhs.authority);
 941    if (x != 0) return x;
 942    x = path.cmp(rhs.path);
 943    if (x != 0) return x;
 944    x = boolcmp(m_queryDefined, rhs.m_queryDefined);
 945    if (x != 0) return x;
 946    x = strcmp(m_query.c_str(), rhs.m_query.c_str());
 947    if (x != 0) return x;
 948    x = boolcmp(m_fragmentDefined, rhs.m_fragmentDefined);
 949    if (x != 0) return x;
 950    return strcmp(m_fragment.c_str(), rhs.m_fragment.c_str());
 951}
 952
 953%%{
 954    machine querystring_parser;
 955
 956    action mark { mark = fpc; }
 957    action saveKey {
 958        m_iterator = m_qs.insert(std::make_pair(
 959            unescape(std::string(mark, fpc - mark), true), std::string()));
 960        mark = NULL;
 961    }
 962    action saveValue {
 963        MORDOR_ASSERT(m_iterator != m_qs.end());
 964        if (fpc - mark == 0 && m_iterator->first.empty())
 965            m_qs.erase(m_iterator);
 966        else
 967            m_iterator->second = unescape(std::string(mark, fpc - mark), true);
 968        m_iterator = m_qs.end();
 969        mark = NULL;
 970    }
 971    action saveNoValue {
 972        if (m_iterator != m_qs.end() && m_iterator->first.empty()) {
 973            m_qs.erase(m_iterator);
 974            mark = NULL;
 975        }
 976    }
 977
 978    sub_delims = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";";
 979    unreserved = alpha | digit | "-" | "." | "_" | "~";
 980    pct_encoded = "%" xdigit xdigit;
 981    pchar = unreserved | pct_encoded | sub_delims | ":" | "@";
 982    querychar = (pchar | "/" | "?") -- '&' -- ';';
 983    key = querychar*;
 984    value = (querychar | '=')*;
 985    keyValue = key >mark %saveKey ('=' value >mark %saveValue)? %saveNoValue;
 986    main := keyValue? ( ('&' | ';') keyValue? )*;
 987    write data;
 988}%%
 989
 990class QueryStringParser : public RagelParser
 991{
 992public:
 993    QueryStringParser(URI::QueryString &qs)
 994    : m_qs(qs),
 995      m_iterator(m_qs.end())
 996    {}
 997
 998
 999    void init()
1000    {
1001        RagelParser::init();
1002        %% write init;
1003    }
1004
1005    void exec()
1006    {
1007#ifdef MSVC
1008#pragma warning(push)
1009#pragma warning(disable : 4244)
1010#endif
1011        %% write exec;
1012#ifdef MSVC
1013#pragma warning(pop)
1014#endif
1015    }
1016
1017    bool complete() const { return false; }
1018    bool final() const
1019    {
1020        return cs >= querystring_parser_first_final;
1021    }
1022
1023    bool error() const
1024    {
1025        return cs == querystring_parser_error;
1026    }
1027
1028private:
1029    URI::QueryString &m_qs;
1030    URI::QueryString::iterator m_iterator;
1031};
1032
1033URI::QueryString &
1034URI::QueryString::operator =(const std::string &string)
1035{
1036    clear();
1037    QueryStringParser parser(*this);
1038    parser.run(string);
1039    if (!parser.final() || parser.error())
1040        MORDOR_THROW_EXCEPTION(std::invalid_argument("Invalid QueryString"));
1041    return *this;
1042}
1043
1044URI::QueryString &
1045URI::QueryString::operator =(Stream &stream)
1046{
1047    clear();
1048    QueryStringParser parser(*this);
1049    parser.run(stream);
1050    if (!parser.final() || parser.error())
1051        MORDOR_THROW_EXCEPTION(std::invalid_argument("Invalid QueryString"));
1052    return *this;
1053}
1054
1055std::string
1056URI::QueryString::toString() const
1057{
1058    std::ostringstream os;
1059    for (const_iterator it = begin();
1060        it != end();
1061        ++it) {
1062        if (it != begin()) {
1063            os << '&';
1064        }
1065        os << escape(it->first, Mordor::queryString, true);
1066        if (!it->second.empty())
1067            os << '=' << escape(it->second, Mordor::queryString, true);
1068    }
1069    return os.str();
1070}
1071
1072std::string &
1073URI::QueryString::operator[](const std::string &key)
1074{
1075    std::pair<iterator, iterator> its = equal_range(key);
1076    // Did not exist; create it
1077    if (its.first == its.second)
1078        return insert(std::make_pair(key, std::string()))->second;
1079    // Multiple instances; remove all but the first
1080    iterator next = its.first;
1081    ++next;
1082    erase(next, its.second);
1083    // Left with a single (first) instance; return it
1084    return its.first->second;
1085}
1086
1087std::string
1088URI::QueryString::operator[](const std::string &key) const
1089{
1090    std::pair<const_iterator, const_iterator> its = equal_range(key);
1091    // Did not exist
1092    if (its.first == its.second)
1093        return std::string();
1094    // Return only the first instance
1095    return its.first->second;
1096}
1097
1098}