capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

url.h (6066B)


      1 // Copyright (c) 2017 Cloudflare, Inc. and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 #pragma once
     23 
     24 #include <kj/string.h>
     25 #include <kj/vector.h>
     26 #include <inttypes.h>
     27 
     28 namespace kj {
     29 
     30 struct UrlOptions {
     31   // A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's
     32   // behavior.
     33   //
     34   // A copy of this options struct will be stored in the parsed Url object, at which point it
     35   // controls the behavior of the serializer in Url::toString().
     36 
     37   bool percentDecode = true;
     38   // True if URL components should be automatically percent-decoded during parsing, and
     39   // percent-encoded during serialization.
     40 
     41   bool allowEmpty = false;
     42   // Whether or not to allow empty path and query components when parsing; otherwise, they are
     43   // silently removed. In other words, setting this false causes consecutive slashes in the path or
     44   // consecutive ampersands in the query to be collapsed into one, whereas if true then they
     45   // produce empty components.
     46 };
     47 
     48 struct Url {
     49   // Represents a URL (or, more accurately, a URI, but whatever).
     50   //
     51   // Can be parsed from a string and composed back into a string.
     52 
     53   String scheme;
     54   // E.g. "http", "https".
     55 
     56   struct UserInfo {
     57     String username;
     58     Maybe<String> password;
     59   };
     60 
     61   Maybe<UserInfo> userInfo;
     62   // Username / password.
     63 
     64   String host;
     65   // Hostname, including port if specified. We choose not to parse out the port because KJ's
     66   // network address parsing functions already accept addresses containing port numbers, and
     67   // because most web standards don't actually want to separate host and port.
     68 
     69   Vector<String> path;
     70   bool hasTrailingSlash = false;
     71   // Path, split on '/' characters. Note that the individual components of `path` could contain
     72   // '/' characters if they were percent-encoded in the original URL.
     73   //
     74   // No component of the path is allowed to be "", ".", nor ".."; if such components are present,
     75   // toString() will throw. Note that parse() and parseRelative() automatically resolve such
     76   // components.
     77 
     78   struct QueryParam {
     79     String name;
     80     String value;
     81   };
     82   Vector<QueryParam> query;
     83   // Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign,
     84   // it will be parsed as a key with a null value, and later serialized with no '=' sign if you call
     85   // Url::toString().
     86   //
     87   // To distinguish between null-valued and empty-valued query parameters, we test whether
     88   // QueryParam::value is an allocated or unallocated string. For example:
     89   //
     90   //     QueryParam { kj::str("name"), nullptr }      // Null-valued; will not have an '=' sign.
     91   //     QueryParam { kj::str("name"), kj::str("") }  // Empty-valued; WILL have an '=' sign.
     92 
     93   Maybe<String> fragment;
     94   // The stuff after the '#' character (not including the '#' character itself), if present.
     95 
     96   using Options = UrlOptions;
     97   Options options;
     98 
     99   // ---------------------------------------------------------------------------
    100 
    101   Url() = default;
    102   Url(Url&&) = default;
    103   ~Url() noexcept(false);
    104   Url& operator=(Url&&) = default;
    105 
    106   inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path,
    107              bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment,
    108              UrlOptions options)
    109       : scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)),
    110         hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)),
    111         options(options) {}
    112   // This constructor makes brace initialization work in C++11 and C++20 -- but is technically not
    113   // needed in C++14 nor C++17. Go figure.
    114 
    115   Url clone() const;
    116 
    117   enum Context {
    118     REMOTE_HREF,
    119     // A link to a remote resource. Requires an authority (hostname) section, hence this will
    120     // reject things like "mailto:" and "data:". This is the default context.
    121 
    122     HTTP_PROXY_REQUEST,
    123     // The URL to place in the first line of an HTTP proxy request. This includes scheme, host,
    124     // path, and query, but omits userInfo (which should be used to construct the Authorization
    125     // header) and fragment (which should not be transmitted).
    126 
    127     HTTP_REQUEST
    128     // The path to place in the first line of a regular HTTP request. This includes only the path
    129     // and query. Scheme, user, host, and fragment are omitted.
    130 
    131     // TODO(someday): Add context(s) that supports things like "mailto:", "data:", "blob:". These
    132     //   don't have an authority section.
    133   };
    134 
    135   kj::String toString(Context context = REMOTE_HREF) const;
    136   // Convert the URL to a string.
    137 
    138   static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
    139   static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
    140   // Parse an absolute URL.
    141 
    142   Url parseRelative(StringPtr relative) const;
    143   Maybe<Url> tryParseRelative(StringPtr relative) const;
    144   // Parse a relative URL string with this URL as the base.
    145 };
    146 
    147 } // namespace kj