url.h (6066B)
1 // Copyright (c) 2017 Cloudflare, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #pragma once 23 24 #include <kj/string.h> 25 #include <kj/vector.h> 26 #include <inttypes.h> 27 28 namespace kj { 29 30 struct UrlOptions { 31 // A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's 32 // behavior. 33 // 34 // A copy of this options struct will be stored in the parsed Url object, at which point it 35 // controls the behavior of the serializer in Url::toString(). 36 37 bool percentDecode = true; 38 // True if URL components should be automatically percent-decoded during parsing, and 39 // percent-encoded during serialization. 40 41 bool allowEmpty = false; 42 // Whether or not to allow empty path and query components when parsing; otherwise, they are 43 // silently removed. In other words, setting this false causes consecutive slashes in the path or 44 // consecutive ampersands in the query to be collapsed into one, whereas if true then they 45 // produce empty components. 46 }; 47 48 struct Url { 49 // Represents a URL (or, more accurately, a URI, but whatever). 50 // 51 // Can be parsed from a string and composed back into a string. 52 53 String scheme; 54 // E.g. "http", "https". 55 56 struct UserInfo { 57 String username; 58 Maybe<String> password; 59 }; 60 61 Maybe<UserInfo> userInfo; 62 // Username / password. 63 64 String host; 65 // Hostname, including port if specified. We choose not to parse out the port because KJ's 66 // network address parsing functions already accept addresses containing port numbers, and 67 // because most web standards don't actually want to separate host and port. 68 69 Vector<String> path; 70 bool hasTrailingSlash = false; 71 // Path, split on '/' characters. Note that the individual components of `path` could contain 72 // '/' characters if they were percent-encoded in the original URL. 73 // 74 // No component of the path is allowed to be "", ".", nor ".."; if such components are present, 75 // toString() will throw. Note that parse() and parseRelative() automatically resolve such 76 // components. 77 78 struct QueryParam { 79 String name; 80 String value; 81 }; 82 Vector<QueryParam> query; 83 // Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign, 84 // it will be parsed as a key with a null value, and later serialized with no '=' sign if you call 85 // Url::toString(). 86 // 87 // To distinguish between null-valued and empty-valued query parameters, we test whether 88 // QueryParam::value is an allocated or unallocated string. For example: 89 // 90 // QueryParam { kj::str("name"), nullptr } // Null-valued; will not have an '=' sign. 91 // QueryParam { kj::str("name"), kj::str("") } // Empty-valued; WILL have an '=' sign. 92 93 Maybe<String> fragment; 94 // The stuff after the '#' character (not including the '#' character itself), if present. 95 96 using Options = UrlOptions; 97 Options options; 98 99 // --------------------------------------------------------------------------- 100 101 Url() = default; 102 Url(Url&&) = default; 103 ~Url() noexcept(false); 104 Url& operator=(Url&&) = default; 105 106 inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path, 107 bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment, 108 UrlOptions options) 109 : scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)), 110 hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)), 111 options(options) {} 112 // This constructor makes brace initialization work in C++11 and C++20 -- but is technically not 113 // needed in C++14 nor C++17. Go figure. 114 115 Url clone() const; 116 117 enum Context { 118 REMOTE_HREF, 119 // A link to a remote resource. Requires an authority (hostname) section, hence this will 120 // reject things like "mailto:" and "data:". This is the default context. 121 122 HTTP_PROXY_REQUEST, 123 // The URL to place in the first line of an HTTP proxy request. This includes scheme, host, 124 // path, and query, but omits userInfo (which should be used to construct the Authorization 125 // header) and fragment (which should not be transmitted). 126 127 HTTP_REQUEST 128 // The path to place in the first line of a regular HTTP request. This includes only the path 129 // and query. Scheme, user, host, and fragment are omitted. 130 131 // TODO(someday): Add context(s) that supports things like "mailto:", "data:", "blob:". These 132 // don't have an authority section. 133 }; 134 135 kj::String toString(Context context = REMOTE_HREF) const; 136 // Convert the URL to a string. 137 138 static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {}); 139 static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {}); 140 // Parse an absolute URL. 141 142 Url parseRelative(StringPtr relative) const; 143 Maybe<Url> tryParseRelative(StringPtr relative) const; 144 // Parse a relative URL string with this URL as the base. 145 }; 146 147 } // namespace kj