http.h (46752B)
1 // Copyright (c) 2017 Sandstorm Development Group, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #pragma once 23 // The KJ HTTP client/server library. 24 // 25 // This is a simple library which can be used to implement an HTTP client or server. Properties 26 // of this library include: 27 // - Uses KJ async framework. 28 // - Agnostic to transport layer -- you can provide your own. 29 // - Header parsing is zero-copy -- it results in strings that point directly into the buffer 30 // received off the wire. 31 // - Application code which reads and writes headers refers to headers by symbolic names, not by 32 // string literals, with lookups being array-index-based, not map-based. To make this possible, 33 // the application announces what headers it cares about in advance, in order to assign numeric 34 // values to them. 35 // - Methods are identified by an enum. 36 37 #include <kj/string.h> 38 #include <kj/vector.h> 39 #include <kj/memory.h> 40 #include <kj/one-of.h> 41 #include <kj/async-io.h> 42 43 namespace kj { 44 45 #define KJ_HTTP_FOR_EACH_METHOD(MACRO) \ 46 MACRO(GET) \ 47 MACRO(HEAD) \ 48 MACRO(POST) \ 49 MACRO(PUT) \ 50 MACRO(DELETE) \ 51 MACRO(PATCH) \ 52 MACRO(PURGE) \ 53 MACRO(OPTIONS) \ 54 MACRO(TRACE) \ 55 /* standard methods */ \ 56 /* */ \ 57 /* (CONNECT is intentionally omitted since it should be handled specially in HttpServer) */ \ 58 \ 59 MACRO(COPY) \ 60 MACRO(LOCK) \ 61 MACRO(MKCOL) \ 62 MACRO(MOVE) \ 63 MACRO(PROPFIND) \ 64 MACRO(PROPPATCH) \ 65 MACRO(SEARCH) \ 66 MACRO(UNLOCK) \ 67 MACRO(ACL) \ 68 /* WebDAV */ \ 69 \ 70 MACRO(REPORT) \ 71 MACRO(MKACTIVITY) \ 72 MACRO(CHECKOUT) \ 73 MACRO(MERGE) \ 74 /* Subversion */ \ 75 \ 76 MACRO(MSEARCH) \ 77 MACRO(NOTIFY) \ 78 MACRO(SUBSCRIBE) \ 79 MACRO(UNSUBSCRIBE) 80 /* UPnP */ 81 82 enum class HttpMethod { 83 // Enum of known HTTP methods. 84 // 85 // We use an enum rather than a string to allow for faster parsing and switching and to reduce 86 // ambiguity. 87 88 #define DECLARE_METHOD(id) id, 89 KJ_HTTP_FOR_EACH_METHOD(DECLARE_METHOD) 90 #undef DECLARE_METHOD 91 }; 92 93 kj::StringPtr KJ_STRINGIFY(HttpMethod method); 94 kj::Maybe<HttpMethod> tryParseHttpMethod(kj::StringPtr name); 95 96 class HttpHeaderTable; 97 98 class HttpHeaderId { 99 // Identifies an HTTP header by numeric ID that indexes into an HttpHeaderTable. 100 // 101 // The KJ HTTP API prefers that headers be identified by these IDs for a few reasons: 102 // - Integer lookups are much more efficient than string lookups. 103 // - Case-insensitivity is awkward to deal with when const strings are being passed to the lookup 104 // method. 105 // - Writing out strings less often means fewer typos. 106 // 107 // See HttpHeaderTable for usage hints. 108 109 public: 110 HttpHeaderId() = default; 111 112 inline bool operator==(const HttpHeaderId& other) const { return id == other.id; } 113 inline bool operator!=(const HttpHeaderId& other) const { return id != other.id; } 114 inline bool operator< (const HttpHeaderId& other) const { return id < other.id; } 115 inline bool operator> (const HttpHeaderId& other) const { return id > other.id; } 116 inline bool operator<=(const HttpHeaderId& other) const { return id <= other.id; } 117 inline bool operator>=(const HttpHeaderId& other) const { return id >= other.id; } 118 119 inline size_t hashCode() const { return id; } 120 // Returned value is guaranteed to be small and never collide with other headers on the same 121 // table. 122 123 kj::StringPtr toString() const; 124 125 void requireFrom(const HttpHeaderTable& table) const; 126 // In debug mode, throws an exception if the HttpHeaderId is not from the given table. 127 // 128 // In opt mode, no-op. 129 130 #define KJ_HTTP_FOR_EACH_BUILTIN_HEADER(MACRO) \ 131 /* Headers that are always read-only. */ \ 132 MACRO(CONNECTION, "Connection") \ 133 MACRO(KEEP_ALIVE, "Keep-Alive") \ 134 MACRO(TE, "TE") \ 135 MACRO(TRAILER, "Trailer") \ 136 MACRO(UPGRADE, "Upgrade") \ 137 \ 138 /* Headers that are read-only except in the case of a response to a HEAD request. */ \ 139 MACRO(CONTENT_LENGTH, "Content-Length") \ 140 MACRO(TRANSFER_ENCODING, "Transfer-Encoding") \ 141 \ 142 /* Headers that are read-only for WebSocket handshakes. */ \ 143 MACRO(SEC_WEBSOCKET_KEY, "Sec-WebSocket-Key") \ 144 MACRO(SEC_WEBSOCKET_VERSION, "Sec-WebSocket-Version") \ 145 MACRO(SEC_WEBSOCKET_ACCEPT, "Sec-WebSocket-Accept") \ 146 MACRO(SEC_WEBSOCKET_EXTENSIONS, "Sec-WebSocket-Extensions") \ 147 \ 148 /* Headers that you can write. */ \ 149 MACRO(HOST, "Host") \ 150 MACRO(DATE, "Date") \ 151 MACRO(LOCATION, "Location") \ 152 MACRO(CONTENT_TYPE, "Content-Type") 153 // For convenience, these headers are valid for all HttpHeaderTables. You can refer to them like: 154 // 155 // HttpHeaderId::HOST 156 // 157 // TODO(someday): Fill this out with more common headers. 158 159 #define DECLARE_HEADER(id, name) \ 160 static const HttpHeaderId id; 161 // Declare a constant for each builtin header, e.g.: HttpHeaderId::CONNECTION 162 163 KJ_HTTP_FOR_EACH_BUILTIN_HEADER(DECLARE_HEADER); 164 #undef DECLARE_HEADER 165 166 private: 167 const HttpHeaderTable* table; 168 uint id; 169 170 inline explicit constexpr HttpHeaderId(const HttpHeaderTable* table, uint id) 171 : table(table), id(id) {} 172 friend class HttpHeaderTable; 173 friend class HttpHeaders; 174 }; 175 176 class HttpHeaderTable { 177 // Construct an HttpHeaderTable to declare which headers you'll be interested in later on, and 178 // to manufacture IDs for them. 179 // 180 // Example: 181 // 182 // // Build a header table with the headers we are interested in. 183 // kj::HttpHeaderTable::Builder builder; 184 // const HttpHeaderId accept = builder.add("Accept"); 185 // const HttpHeaderId contentType = builder.add("Content-Type"); 186 // kj::HttpHeaderTable table(kj::mv(builder)); 187 // 188 // // Create an HTTP client. 189 // auto client = kj::newHttpClient(table, network); 190 // 191 // // Get http://example.com. 192 // HttpHeaders headers(table); 193 // headers.set(accept, "text/html"); 194 // auto response = client->send(kj::HttpMethod::GET, "http://example.com", headers) 195 // .wait(waitScope); 196 // auto msg = kj::str("Response content type: ", response.headers.get(contentType)); 197 198 struct IdsByNameMap; 199 200 public: 201 HttpHeaderTable(); 202 // Constructs a table that only contains the builtin headers. 203 204 class Builder { 205 public: 206 Builder(); 207 HttpHeaderId add(kj::StringPtr name); 208 Own<HttpHeaderTable> build(); 209 210 HttpHeaderTable& getFutureTable(); 211 // Get the still-unbuilt header table. You cannot actually use it until build() has been 212 // called. 213 // 214 // This method exists to help when building a shared header table -- the Builder may be passed 215 // to several components, each of which will register the headers they need and get a reference 216 // to the future table. 217 218 private: 219 kj::Own<HttpHeaderTable> table; 220 }; 221 222 KJ_DISALLOW_COPY(HttpHeaderTable); // Can't copy because HttpHeaderId points to the table. 223 ~HttpHeaderTable() noexcept(false); 224 225 uint idCount() const; 226 // Return the number of IDs in the table. 227 228 kj::Maybe<HttpHeaderId> stringToId(kj::StringPtr name) const; 229 // Try to find an ID for the given name. The matching is case-insensitive, per the HTTP spec. 230 // 231 // Note: if `name` contains characters that aren't allowed in HTTP header names, this may return 232 // a bogus value rather than null, due to optimizations used in case-insensitive matching. 233 234 kj::StringPtr idToString(HttpHeaderId id) const; 235 // Get the canonical string name for the given ID. 236 237 private: 238 kj::Vector<kj::StringPtr> namesById; 239 kj::Own<IdsByNameMap> idsByName; 240 }; 241 242 class HttpHeaders { 243 // Represents a set of HTTP headers. 244 // 245 // This class guards against basic HTTP header injection attacks: Trying to set a header name or 246 // value containing a newline, carriage return, or other invalid character will throw an 247 // exception. 248 249 public: 250 explicit HttpHeaders(const HttpHeaderTable& table); 251 252 static bool isValidHeaderValue(kj::StringPtr value); 253 // This returns whether the value is a valid parameter to the set call. While the HTTP spec 254 // suggests that only printable ASCII characters are allowed in header values, in practice that 255 // turns out to not be the case. We follow the browser's lead in disallowing \r and \n. 256 // https://github.com/httpwg/http11bis/issues/19 257 // Use this if you want to validate the value before supplying it to set() if you want to avoid 258 // an exception being thrown (e.g. you have custom error reporting). NOTE that set will still 259 // validate the value. If performance is a problem this API needs to be adjusted to a 260 // `validateHeaderValue` function that returns a special type that set can be confident has 261 // already passed through the validation routine. 262 263 KJ_DISALLOW_COPY(HttpHeaders); 264 HttpHeaders(HttpHeaders&&) = default; 265 HttpHeaders& operator=(HttpHeaders&&) = default; 266 267 size_t size() const; 268 // Returns the number of headers that forEach() would iterate over. 269 270 void clear(); 271 // Clears all contents, as if the object was freshly-allocated. However, calling this rather 272 // than actually re-allocating the object may avoid re-allocation of internal objects. 273 274 HttpHeaders clone() const; 275 // Creates a deep clone of the HttpHeaders. The returned object owns all strings it references. 276 277 HttpHeaders cloneShallow() const; 278 // Creates a shallow clone of the HttpHeaders. The returned object references the same strings 279 // as the original, owning none of them. 280 281 bool isWebSocket() const; 282 // Convenience method that checks for the presence of the header `Upgrade: websocket`. 283 // 284 // Note that this does not actually validate that the request is a complete WebSocket handshake 285 // with the correct version number -- such validation will occur if and when you call 286 // acceptWebSocket(). 287 288 kj::Maybe<kj::StringPtr> get(HttpHeaderId id) const; 289 // Read a header. 290 291 template <typename Func> 292 void forEach(Func&& func) const; 293 // Calls `func(name, value)` for each header in the set -- including headers that aren't mapped 294 // to IDs in the header table. Both inputs are of type kj::StringPtr. 295 296 template <typename Func1, typename Func2> 297 void forEach(Func1&& func1, Func2&& func2) const; 298 // Calls `func1(id, value)` for each header in the set that has a registered HttpHeaderId, and 299 // `func2(name, value)` for each header that does not. All calls to func1() precede all calls to 300 // func2(). 301 302 void set(HttpHeaderId id, kj::StringPtr value); 303 void set(HttpHeaderId id, kj::String&& value); 304 // Sets a header value, overwriting the existing value. 305 // 306 // The String&& version is equivalent to calling the other version followed by takeOwnership(). 307 // 308 // WARNING: It is the caller's responsibility to ensure that `value` remains valid until the 309 // HttpHeaders object is destroyed. This allows string literals to be passed without making a 310 // copy, but complicates the use of dynamic values. Hint: Consider using `takeOwnership()`. 311 312 void add(kj::StringPtr name, kj::StringPtr value); 313 void add(kj::StringPtr name, kj::String&& value); 314 void add(kj::String&& name, kj::String&& value); 315 // Append a header. `name` will be looked up in the header table, but if it's not mapped, the 316 // header will be added to the list of unmapped headers. 317 // 318 // The String&& versions are equivalent to calling the other version followed by takeOwnership(). 319 // 320 // WARNING: It is the caller's responsibility to ensure that `name` and `value` remain valid 321 // until the HttpHeaders object is destroyed. This allows string literals to be passed without 322 // making a copy, but complicates the use of dynamic values. Hint: Consider using 323 // `takeOwnership()`. 324 325 void unset(HttpHeaderId id); 326 // Removes a header. 327 // 328 // It's not possible to remove a header by string name because non-indexed headers would take 329 // O(n) time to remove. Instead, construct a new HttpHeaders object and copy contents. 330 331 void takeOwnership(kj::String&& string); 332 void takeOwnership(kj::Array<char>&& chars); 333 void takeOwnership(HttpHeaders&& otherHeaders); 334 // Takes overship of a string so that it lives until the HttpHeaders object is destroyed. Useful 335 // when you've passed a dynamic value to set() or add() or parse*(). 336 337 struct Request { 338 HttpMethod method; 339 kj::StringPtr url; 340 }; 341 struct Response { 342 uint statusCode; 343 kj::StringPtr statusText; 344 }; 345 346 struct ProtocolError { 347 // Represents a protocol error, such as a bad request method or invalid headers. Debugging such 348 // errors is difficult without a copy of the data which we tried to parse, but this data is 349 // sensitive, so we can't just lump it into the error description directly. ProtocolError 350 // provides this sensitive data separate from the error description. 351 // 352 // TODO(cleanup): Should maybe not live in HttpHeaders? HttpServerErrorHandler::ProtocolError? 353 // Or HttpProtocolError? Or maybe we need a more general way of attaching sensitive context to 354 // kj::Exceptions? 355 356 uint statusCode; 357 // Suggested HTTP status code that should be used when returning an error to the client. 358 // 359 // Most errors are 400. An unrecognized method will be 501. 360 361 kj::StringPtr statusMessage; 362 // HTTP status message to go with `statusCode`, e.g. "Bad Request". 363 364 kj::StringPtr description; 365 // An error description safe for all the world to see. 366 367 kj::ArrayPtr<char> rawContent; 368 // Unredacted data which led to the error condition. This may contain anything transported over 369 // HTTP, to include sensitive PII, so you must take care to sanitize this before using it in any 370 // error report that may leak to unprivileged eyes. 371 // 372 // This ArrayPtr is merely a copy of the `content` parameter passed to `tryParseRequest()` / 373 // `tryParseResponse()`, thus it remains valid for as long as a successfully-parsed HttpHeaders 374 // object would remain valid. 375 }; 376 377 using RequestOrProtocolError = kj::OneOf<Request, ProtocolError>; 378 using ResponseOrProtocolError = kj::OneOf<Response, ProtocolError>; 379 380 RequestOrProtocolError tryParseRequest(kj::ArrayPtr<char> content); 381 ResponseOrProtocolError tryParseResponse(kj::ArrayPtr<char> content); 382 // Parse an HTTP header blob and add all the headers to this object. 383 // 384 // `content` should be all text from the start of the request to the first occurrance of two 385 // newlines in a row -- including the first of these two newlines, but excluding the second. 386 // 387 // The parse is performed with zero copies: The callee clobbers `content` with '\0' characters 388 // to split it into a bunch of shorter strings. The caller must keep `content` valid until the 389 // `HttpHeaders` is destroyed, or pass it to `takeOwnership()`. 390 391 bool tryParse(kj::ArrayPtr<char> content); 392 // Like tryParseRequest()/tryParseResponse(), but don't expect any request/response line. 393 394 kj::String serializeRequest(HttpMethod method, kj::StringPtr url, 395 kj::ArrayPtr<const kj::StringPtr> connectionHeaders = nullptr) const; 396 kj::String serializeResponse(uint statusCode, kj::StringPtr statusText, 397 kj::ArrayPtr<const kj::StringPtr> connectionHeaders = nullptr) const; 398 // **Most applications will not use these methods; they are called by the HTTP client and server 399 // implementations.** 400 // 401 // Serialize the headers as a complete request or response blob. The blob uses '\r\n' newlines 402 // and includes the double-newline to indicate the end of the headers. 403 // 404 // `connectionHeaders`, if provided, contains connection-level headers supplied by the HTTP 405 // implementation, in the order specified by the KJ_HTTP_FOR_EACH_BUILTIN_HEADER macro. These 406 // headers values override any corresponding header value in the HttpHeaders object. The 407 // CONNECTION_HEADERS_COUNT constants below can help you construct this `connectionHeaders` array. 408 409 enum class BuiltinIndicesEnum { 410 #define HEADER_ID(id, name) id, 411 KJ_HTTP_FOR_EACH_BUILTIN_HEADER(HEADER_ID) 412 #undef HEADER_ID 413 }; 414 415 struct BuiltinIndices { 416 #define HEADER_ID(id, name) static constexpr uint id = static_cast<uint>(BuiltinIndicesEnum::id); 417 KJ_HTTP_FOR_EACH_BUILTIN_HEADER(HEADER_ID) 418 #undef HEADER_ID 419 }; 420 421 static constexpr uint HEAD_RESPONSE_CONNECTION_HEADERS_COUNT = BuiltinIndices::CONTENT_LENGTH; 422 static constexpr uint CONNECTION_HEADERS_COUNT = BuiltinIndices::SEC_WEBSOCKET_KEY; 423 static constexpr uint WEBSOCKET_CONNECTION_HEADERS_COUNT = BuiltinIndices::HOST; 424 // Constants for use with HttpHeaders::serialize*(). 425 426 kj::String toString() const; 427 428 private: 429 const HttpHeaderTable* table; 430 431 kj::Array<kj::StringPtr> indexedHeaders; 432 // Size is always table->idCount(). 433 434 struct Header { 435 kj::StringPtr name; 436 kj::StringPtr value; 437 }; 438 kj::Vector<Header> unindexedHeaders; 439 440 kj::Vector<kj::Array<char>> ownedStrings; 441 442 void addNoCheck(kj::StringPtr name, kj::StringPtr value); 443 444 kj::StringPtr cloneToOwn(kj::StringPtr str); 445 446 kj::String serialize(kj::ArrayPtr<const char> word1, 447 kj::ArrayPtr<const char> word2, 448 kj::ArrayPtr<const char> word3, 449 kj::ArrayPtr<const kj::StringPtr> connectionHeaders) const; 450 451 bool parseHeaders(char* ptr, char* end); 452 453 // TODO(perf): Arguably we should store a map, but header sets are never very long 454 // TODO(perf): We could optimize for common headers by storing them directly as fields. We could 455 // also add direct accessors for those headers. 456 }; 457 458 class HttpInputStream { 459 // Low-level interface to receive HTTP-formatted messages (headers followed by body) from an 460 // input stream, without a paired output stream. 461 // 462 // Most applications will not use this. Regular HTTP clients and servers don't need this. This 463 // is mainly useful for apps implementing various protocols that look like HTTP but aren't 464 // really. 465 466 public: 467 struct Request { 468 HttpMethod method; 469 kj::StringPtr url; 470 const HttpHeaders& headers; 471 kj::Own<kj::AsyncInputStream> body; 472 }; 473 virtual kj::Promise<Request> readRequest() = 0; 474 // Reads one HTTP request from the input stream. 475 // 476 // The returned struct contains pointers directly into a buffer that is invalidated on the next 477 // message read. 478 479 struct Response { 480 uint statusCode; 481 kj::StringPtr statusText; 482 const HttpHeaders& headers; 483 kj::Own<kj::AsyncInputStream> body; 484 }; 485 virtual kj::Promise<Response> readResponse(HttpMethod requestMethod) = 0; 486 // Reads one HTTP response from the input stream. 487 // 488 // You must provide the request method because responses to HEAD requests require special 489 // treatment. 490 // 491 // The returned struct contains pointers directly into a buffer that is invalidated on the next 492 // message read. 493 494 struct Message { 495 const HttpHeaders& headers; 496 kj::Own<kj::AsyncInputStream> body; 497 }; 498 virtual kj::Promise<Message> readMessage() = 0; 499 // Reads an HTTP header set followed by a body, with no request or response line. This is not 500 // useful for HTTP but may be useful for other protocols that make the unfortunate choice to 501 // mimic HTTP message format, such as Visual Studio Code's JSON-RPC transport. 502 // 503 // The returned struct contains pointers directly into a buffer that is invalidated on the next 504 // message read. 505 506 virtual kj::Promise<bool> awaitNextMessage() = 0; 507 // Waits until more data is available, but doesn't consume it. Returns false on EOF. 508 }; 509 510 class EntropySource { 511 // Interface for an object that generates entropy. Typically, cryptographically-random entropy 512 // is expected. 513 // 514 // TODO(cleanup): Put this somewhere more general. 515 516 public: 517 virtual void generate(kj::ArrayPtr<byte> buffer) = 0; 518 }; 519 520 class WebSocket { 521 // Interface representincg an open WebSocket session. 522 // 523 // Each side can send and receive data and "close" messages. 524 // 525 // Ping/Pong and message fragmentation are not exposed through this interface. These features of 526 // the underlying WebSocket protocol are not exposed by the browser-level JavaScript API either, 527 // and thus applications typically need to implement these features at the application protocol 528 // level instead. The implementation is, however, expected to reply to Ping messages it receives. 529 530 public: 531 virtual kj::Promise<void> send(kj::ArrayPtr<const byte> message) = 0; 532 virtual kj::Promise<void> send(kj::ArrayPtr<const char> message) = 0; 533 // Send a message (binary or text). The underlying buffer must remain valid, and you must not 534 // call send() again, until the returned promise resolves. 535 536 virtual kj::Promise<void> close(uint16_t code, kj::StringPtr reason) = 0; 537 // Send a Close message. 538 // 539 // Note that the returned Promise resolves once the message has been sent -- it does NOT wait 540 // for the other end to send a Close reply. The application should await a reply before dropping 541 // the WebSocket object. 542 543 virtual kj::Promise<void> disconnect() = 0; 544 // Sends EOF on the underlying connection without sending a "close" message. This is NOT a clean 545 // shutdown, but is sometimes useful when you want the other end to trigger whatever behavior 546 // it normally triggers when a connection is dropped. 547 548 virtual void abort() = 0; 549 // Forcefully close this WebSocket, such that the remote end should get a DISCONNECTED error if 550 // it continues to write. This differs from disconnect(), which only closes the sending 551 // direction, but still allows receives. 552 553 virtual kj::Promise<void> whenAborted() = 0; 554 // Resolves when the remote side aborts the connection such that send() would throw DISCONNECTED, 555 // if this can be detected without actually writing a message. (If not, this promise never 556 // resolves, but send() or receive() will throw DISCONNECTED when appropriate. See also 557 // kj::AsyncOutputStream::whenWriteDisconnected().) 558 559 struct Close { 560 uint16_t code; 561 kj::String reason; 562 }; 563 564 typedef kj::OneOf<kj::String, kj::Array<byte>, Close> Message; 565 566 static constexpr size_t SUGGESTED_MAX_MESSAGE_SIZE = 1u << 20; // 1MB 567 568 virtual kj::Promise<Message> receive(size_t maxSize = SUGGESTED_MAX_MESSAGE_SIZE) = 0; 569 // Read one message from the WebSocket and return it. Can only call once at a time. Do not call 570 // again after Close is received. 571 572 virtual kj::Promise<void> pumpTo(WebSocket& other); 573 // Continuously receives messages from this WebSocket and send them to `other`. 574 // 575 // On EOF, calls other.disconnect(), then resolves. 576 // 577 // On other read errors, calls other.close() with the error, then resolves. 578 // 579 // On write error, rejects with the error. 580 581 virtual kj::Maybe<kj::Promise<void>> tryPumpFrom(WebSocket& other); 582 // Either returns null, or performs the equivalent of other.pumpTo(*this). Only returns non-null 583 // if this WebSocket implementation is able to perform the pump in an optimized way, better than 584 // the default implementation of pumpTo(). The default implementation of pumpTo() always tries 585 // calling this first, and the default implementation of tryPumpFrom() always returns null. 586 587 virtual uint64_t sentByteCount() = 0; 588 virtual uint64_t receivedByteCount() = 0; 589 }; 590 591 class HttpClient { 592 // Interface to the client end of an HTTP connection. 593 // 594 // There are two kinds of clients: 595 // * Host clients are used when talking to a specific host. The `url` specified in a request 596 // is actually just a path. (A `Host` header is still required in all requests.) 597 // * Proxy clients are used when the target could be any arbitrary host on the internet. 598 // The `url` specified in a request is a full URL including protocol and hostname. 599 600 public: 601 struct Response { 602 uint statusCode; 603 kj::StringPtr statusText; 604 const HttpHeaders* headers; 605 kj::Own<kj::AsyncInputStream> body; 606 // `statusText` and `headers` remain valid until `body` is dropped or read from. 607 }; 608 609 struct Request { 610 kj::Own<kj::AsyncOutputStream> body; 611 // Write the request entity body to this stream, then drop it when done. 612 // 613 // May be null for GET and HEAD requests (which have no body) and requests that have 614 // Content-Length: 0. 615 616 kj::Promise<Response> response; 617 // Promise for the eventual response. 618 }; 619 620 virtual Request request(HttpMethod method, kj::StringPtr url, const HttpHeaders& headers, 621 kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0; 622 // Perform an HTTP request. 623 // 624 // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL, 625 // depending on whether the client is a proxy client or a host client. 626 // 627 // `url` and `headers` need only remain valid until `request()` returns (they can be 628 // stack-allocated). 629 // 630 // `expectedBodySize`, if provided, must be exactly the number of bytes that will be written to 631 // the body. This will trigger use of the `Content-Length` connection header. Otherwise, 632 // `Transfer-Encoding: chunked` will be used. 633 634 struct WebSocketResponse { 635 uint statusCode; 636 kj::StringPtr statusText; 637 const HttpHeaders* headers; 638 kj::OneOf<kj::Own<kj::AsyncInputStream>, kj::Own<WebSocket>> webSocketOrBody; 639 // `statusText` and `headers` remain valid until `webSocketOrBody` is dropped or read from. 640 }; 641 virtual kj::Promise<WebSocketResponse> openWebSocket( 642 kj::StringPtr url, const HttpHeaders& headers); 643 // Tries to open a WebSocket. Default implementation calls send() and never returns a WebSocket. 644 // 645 // `url` and `headers` need only remain valid until `openWebSocket()` returns (they can be 646 // stack-allocated). 647 648 virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::StringPtr host); 649 // Handles CONNECT requests. Only relevant for proxy clients. Default implementation throws 650 // UNIMPLEMENTED. 651 }; 652 653 class HttpService { 654 // Interface which HTTP services should implement. 655 // 656 // This interface is functionally equivalent to HttpClient, but is intended for applications to 657 // implement rather than call. The ergonomics and performance of the method signatures are 658 // optimized for the serving end. 659 // 660 // As with clients, there are two kinds of services: 661 // * Host services are used when talking to a specific host. The `url` specified in a request 662 // is actually just a path. (A `Host` header is still required in all requests, and the service 663 // may in fact serve multiple origins via this header.) 664 // * Proxy services are used when the target could be any arbitrary host on the internet, i.e. to 665 // implement an HTTP proxy. The `url` specified in a request is a full URL including protocol 666 // and hostname. 667 668 public: 669 class Response { 670 public: 671 virtual kj::Own<kj::AsyncOutputStream> send( 672 uint statusCode, kj::StringPtr statusText, const HttpHeaders& headers, 673 kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0; 674 // Begin the response. 675 // 676 // `statusText` and `headers` need only remain valid until send() returns (they can be 677 // stack-allocated). 678 679 virtual kj::Own<WebSocket> acceptWebSocket(const HttpHeaders& headers) = 0; 680 // If headers.isWebSocket() is true then you can call acceptWebSocket() instead of send(). 681 682 kj::Promise<void> sendError(uint statusCode, kj::StringPtr statusText, 683 const HttpHeaders& headers); 684 kj::Promise<void> sendError(uint statusCode, kj::StringPtr statusText, 685 const HttpHeaderTable& headerTable); 686 // Convenience wrapper around send() which sends a basic error. A generic error page specifying 687 // the error code is sent as the body. 688 // 689 // You must provide headers or a header table because downstream service wrappers may be 690 // expecting response headers built with a particular table so that they can insert additional 691 // headers. 692 }; 693 694 virtual kj::Promise<void> request( 695 HttpMethod method, kj::StringPtr url, const HttpHeaders& headers, 696 kj::AsyncInputStream& requestBody, Response& response) = 0; 697 // Perform an HTTP request. 698 // 699 // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL, 700 // depending on whether the service is a proxy service or a host service. 701 // 702 // `url` and `headers` are invalidated on the first read from `requestBody` or when the returned 703 // promise resolves, whichever comes first. 704 // 705 // Request processing can be canceled by dropping the returned promise. HttpServer may do so if 706 // the client disconnects prematurely. 707 708 virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::StringPtr host); 709 // Handles CONNECT requests. Only relevant for proxy services. Default implementation throws 710 // UNIMPLEMENTED. 711 }; 712 713 class HttpClientErrorHandler { 714 public: 715 virtual HttpClient::Response handleProtocolError(HttpHeaders::ProtocolError protocolError); 716 // Override this function to customize error handling when the client receives an HTTP message 717 // that fails to parse. The default implementations throws an exception. 718 // 719 // There are two main use cases for overriding this: 720 // 1. `protocolError` contains the actual header content that failed to parse, giving you the 721 // opportunity to log it for debugging purposes. The default implementation throws away this 722 // content. 723 // 2. You could potentially convert protocol errors into HTTP error codes, e.g. 502 Bad Gateway. 724 // 725 // Note that `protocolError` may contain pointers into buffers that are no longer valid once 726 // this method returns; you will have to make copies if you want to keep them. 727 728 virtual HttpClient::WebSocketResponse handleWebSocketProtocolError( 729 HttpHeaders::ProtocolError protocolError); 730 // Like handleProtocolError() but for WebSocket requests. The default implementation calls 731 // handleProtocolError() and converts the Response to WebSocketResponse. There is probably very 732 // little reason to override this. 733 }; 734 735 struct HttpClientSettings { 736 kj::Duration idleTimeout = 5 * kj::SECONDS; 737 // For clients which automatically create new connections, any connection idle for at least this 738 // long will be closed. Set this to 0 to prevent connection reuse entirely. 739 740 kj::Maybe<EntropySource&> entropySource = nullptr; 741 // Must be provided in order to use `openWebSocket`. If you don't need WebSockets, this can be 742 // omitted. The WebSocket protocol uses random values to avoid triggering flaws (including 743 // security flaws) in certain HTTP proxy software. Specifically, entropy is used to generate the 744 // `Sec-WebSocket-Key` header and to generate frame masks. If you know that there are no broken 745 // or vulnerable proxies between you and the server, you can provide a dummy entropy source that 746 // doesn't generate real entropy (e.g. returning the same value every time). Otherwise, you must 747 // provide a cryptographically-random entropy source. 748 749 kj::Maybe<HttpClientErrorHandler&> errorHandler = nullptr; 750 // Customize how protocol errors are handled by the HttpClient. If null, HttpClientErrorHandler's 751 // default implementation will be used. 752 }; 753 754 kj::Own<HttpClient> newHttpClient(kj::Timer& timer, const HttpHeaderTable& responseHeaderTable, 755 kj::Network& network, kj::Maybe<kj::Network&> tlsNetwork, 756 HttpClientSettings settings = HttpClientSettings()); 757 // Creates a proxy HttpClient that connects to hosts over the given network. The URL must always 758 // be an absolute URL; the host is parsed from the URL. This implementation will automatically 759 // add an appropriate Host header (and convert the URL to just a path) once it has connected. 760 // 761 // Note that if you wish to route traffic through an HTTP proxy server rather than connect to 762 // remote hosts directly, you should use the form of newHttpClient() that takes a NetworkAddress, 763 // and supply the proxy's address. 764 // 765 // `responseHeaderTable` is used when parsing HTTP responses. Requests can use any header table. 766 // 767 // `tlsNetwork` is required to support HTTPS destination URLs. If null, only HTTP URLs can be 768 // fetched. 769 770 kj::Own<HttpClient> newHttpClient(kj::Timer& timer, const HttpHeaderTable& responseHeaderTable, 771 kj::NetworkAddress& addr, 772 HttpClientSettings settings = HttpClientSettings()); 773 // Creates an HttpClient that always connects to the given address no matter what URL is requested. 774 // The client will open and close connections as needed. It will attempt to reuse connections for 775 // multiple requests but will not send a new request before the previous response on the same 776 // connection has completed, as doing so can result in head-of-line blocking issues. The client may 777 // be used as a proxy client or a host client depending on whether the peer is operating as 778 // a proxy. (Hint: This is the best kind of client to use when routing traffic through an HTTP 779 // proxy. `addr` should be the address of the proxy, and the proxy itself will resolve remote hosts 780 // based on the URLs passed to it.) 781 // 782 // `responseHeaderTable` is used when parsing HTTP responses. Requests can use any header table. 783 784 kj::Own<HttpClient> newHttpClient(const HttpHeaderTable& responseHeaderTable, 785 kj::AsyncIoStream& stream, 786 HttpClientSettings settings = HttpClientSettings()); 787 // Creates an HttpClient that speaks over the given pre-established connection. The client may 788 // be used as a proxy client or a host client depending on whether the peer is operating as 789 // a proxy. 790 // 791 // Note that since this client has only one stream to work with, it will try to pipeline all 792 // requests on this stream. If one request or response has an I/O failure, all subsequent requests 793 // fail as well. If the destination server chooses to close the connection after a response, 794 // subsequent requests will fail. If a response takes a long time, it blocks subsequent responses. 795 // If a WebSocket is opened successfully, all subsequent requests fail. 796 797 kj::Own<HttpClient> newConcurrencyLimitingHttpClient( 798 HttpClient& inner, uint maxConcurrentRequests, 799 kj::Function<void(uint runningCount, uint pendingCount)> countChangedCallback); 800 // Creates an HttpClient that is limited to a maximum number of concurrent requests. Additional 801 // requests are queued, to be opened only after an open request completes. `countChangedCallback` 802 // is called when a new connection is opened or enqueued and when an open connection is closed, 803 // passing the number of open and pending connections. 804 805 kj::Own<HttpClient> newHttpClient(HttpService& service); 806 kj::Own<HttpService> newHttpService(HttpClient& client); 807 // Adapts an HttpClient to an HttpService and vice versa. 808 809 kj::Own<HttpInputStream> newHttpInputStream( 810 kj::AsyncInputStream& input, const HttpHeaderTable& headerTable); 811 // Create an HttpInputStream on top of the given stream. Normally applications would not call this 812 // directly, but it can be useful for implementing protocols that aren't quite HTTP but use similar 813 // message delimiting. 814 // 815 // The HttpInputStream implementation does read-ahead buffering on `input`. Therefore, when the 816 // HttpInputStream is destroyed, some data read from `input` may be lost, so it's not possible to 817 // continue reading from `input` in a reliable way. 818 819 kj::Own<WebSocket> newWebSocket(kj::Own<kj::AsyncIoStream> stream, 820 kj::Maybe<EntropySource&> maskEntropySource); 821 // Create a new WebSocket on top of the given stream. It is assumed that the HTTP -> WebSocket 822 // upgrade handshake has already occurred (or is not needed), and messages can immediately be 823 // sent and received on the stream. Normally applications would not call this directly. 824 // 825 // `maskEntropySource` is used to generate cryptographically-random frame masks. If null, outgoing 826 // frames will not be masked. Servers are required NOT to mask their outgoing frames, but clients 827 // ARE required to do so. So, on the client side, you MUST specify an entropy source. The mask 828 // must be crytographically random if the data being sent on the WebSocket may be malicious. The 829 // purpose of the mask is to prevent badly-written HTTP proxies from interpreting "things that look 830 // like HTTP requests" in a message as being actual HTTP requests, which could result in cache 831 // poisoning. See RFC6455 section 10.3. 832 833 struct WebSocketPipe { 834 kj::Own<WebSocket> ends[2]; 835 }; 836 837 WebSocketPipe newWebSocketPipe(); 838 // Create a WebSocket pipe. Messages written to one end of the pipe will be readable from the other 839 // end. No buffering occurs -- a message send does not complete until a corresponding receive 840 // accepts the message. 841 842 class HttpServerErrorHandler; 843 class HttpServerCallbacks; 844 845 struct HttpServerSettings { 846 kj::Duration headerTimeout = 15 * kj::SECONDS; 847 // After initial connection open, or after receiving the first byte of a pipelined request, 848 // the client must send the complete request within this time. 849 850 kj::Duration pipelineTimeout = 5 * kj::SECONDS; 851 // After one request/response completes, we'll wait up to this long for a pipelined request to 852 // arrive. 853 854 kj::Duration canceledUploadGracePeriod = 1 * kj::SECONDS; 855 size_t canceledUploadGraceBytes = 65536; 856 // If the HttpService sends a response and returns without having read the entire request body, 857 // then we have to decide whether to close the connection or wait for the client to finish the 858 // request so that it can pipeline the next one. We'll give them a grace period defined by the 859 // above two values -- if they hit either one, we'll close the socket, but if the request 860 // completes, we'll let the connection stay open to handle more requests. 861 862 kj::Maybe<HttpServerErrorHandler&> errorHandler = nullptr; 863 // Customize how client protocol errors and service application exceptions are handled by the 864 // HttpServer. If null, HttpServerErrorHandler's default implementation will be used. 865 866 kj::Maybe<HttpServerCallbacks&> callbacks = nullptr; 867 // Additional optional callbacks used to control some server behavior. 868 }; 869 870 class HttpServerErrorHandler { 871 public: 872 virtual kj::Promise<void> handleClientProtocolError( 873 HttpHeaders::ProtocolError protocolError, kj::HttpService::Response& response); 874 virtual kj::Promise<void> handleApplicationError( 875 kj::Exception exception, kj::Maybe<kj::HttpService::Response&> response); 876 virtual kj::Promise<void> handleNoResponse(kj::HttpService::Response& response); 877 // Override these functions to customize error handling during the request/response cycle. 878 // 879 // Client protocol errors arise when the server receives an HTTP message that fails to parse. As 880 // such, HttpService::request() will not have been called yet, and the handler is always 881 // guaranteed an opportunity to send a response. The default implementation of 882 // handleClientProtocolError() replies with a 400 Bad Request response. 883 // 884 // Application errors arise when HttpService::request() throws an exception. The default 885 // implementation of handleApplicationError() maps the following exception types to HTTP statuses, 886 // and generates bodies from the stringified exceptions: 887 // 888 // - OVERLOADED: 503 Service Unavailable 889 // - UNIMPLEMENTED: 501 Not Implemented 890 // - DISCONNECTED: (no response) 891 // - FAILED: 500 Internal Server Error 892 // 893 // No-response errors occur when HttpService::request() allows its promise to settle before 894 // sending a response. The default implementation of handleNoResponse() replies with a 500 895 // Internal Server Error response. 896 // 897 // Unlike `HttpService::request()`, when calling `response.send()` in the context of one of these 898 // functions, a "Connection: close" header will be added, and the connection will be closed. 899 // 900 // Also unlike `HttpService::request()`, it is okay to return kj::READY_NOW without calling 901 // `response.send()`. In this case, no response will be sent, and the connection will be closed. 902 }; 903 904 class HttpServerCallbacks { 905 public: 906 virtual bool shouldClose() { return false; } 907 // Whenever the HttpServer begins response headers, it will check `shouldClose()` to decide 908 // whether to send a `Connection: close` header and close the connection. 909 // 910 // This can be useful e.g. if the server has too many connections open and wants to shed some 911 // of them. Note that to implement graceful shutdown of a server, you should use 912 // `HttpServer::drain()` instead. 913 }; 914 915 class HttpServer final: private kj::TaskSet::ErrorHandler { 916 // Class which listens for requests on ports or connections and sends them to an HttpService. 917 918 public: 919 typedef HttpServerSettings Settings; 920 typedef kj::Function<kj::Own<HttpService>(kj::AsyncIoStream&)> HttpServiceFactory; 921 922 HttpServer(kj::Timer& timer, const HttpHeaderTable& requestHeaderTable, HttpService& service, 923 Settings settings = Settings()); 924 // Set up an HttpServer that directs incoming connections to the given service. The service 925 // may be a host service or a proxy service depending on whether you are intending to implement 926 // an HTTP server or an HTTP proxy. 927 928 HttpServer(kj::Timer& timer, const HttpHeaderTable& requestHeaderTable, 929 HttpServiceFactory serviceFactory, Settings settings = Settings()); 930 // Like the other constructor, but allows a new HttpService object to be used for each 931 // connection, based on the connection object. This is particularly useful for capturing the 932 // client's IP address and injecting it as a header. 933 934 kj::Promise<void> drain(); 935 // Stop accepting new connections or new requests on existing connections. Finish any requests 936 // that are already executing, then close the connections. Returns once no more requests are 937 // in-flight. 938 939 kj::Promise<void> listenHttp(kj::ConnectionReceiver& port); 940 // Accepts HTTP connections on the given port and directs them to the handler. 941 // 942 // The returned promise never completes normally. It may throw if port.accept() throws. Dropping 943 // the returned promise will cause the server to stop listening on the port, but already-open 944 // connections will continue to be served. Destroy the whole HttpServer to cancel all I/O. 945 946 kj::Promise<void> listenHttp(kj::Own<kj::AsyncIoStream> connection); 947 // Reads HTTP requests from the given connection and directs them to the handler. A successful 948 // completion of the promise indicates that all requests received on the connection resulted in 949 // a complete response, and the client closed the connection gracefully or drain() was called. 950 // The promise throws if an unparseable request is received or if some I/O error occurs. Dropping 951 // the returned promise will cancel all I/O on the connection and cancel any in-flight requests. 952 953 kj::Promise<bool> listenHttpCleanDrain(kj::AsyncIoStream& connection); 954 // Like listenHttp(), but allows you to potentially drain the server without closing connections. 955 // The returned promise resolves to `true` if the connection has been left in a state where a 956 // new HttpServer could potentially accept further requests from it. If `false`, then the 957 // connection is either in an inconsistent state or already completed a closing handshake; the 958 // caller should close it without any further reads/writes. Note this only ever returns `true` 959 // if you called `drain()` -- otherwise this server would keep handling the connection. 960 961 private: 962 class Connection; 963 964 kj::Timer& timer; 965 const HttpHeaderTable& requestHeaderTable; 966 kj::OneOf<HttpService*, HttpServiceFactory> service; 967 Settings settings; 968 969 bool draining = false; 970 kj::ForkedPromise<void> onDrain; 971 kj::Own<kj::PromiseFulfiller<void>> drainFulfiller; 972 973 uint connectionCount = 0; 974 kj::Maybe<kj::Own<kj::PromiseFulfiller<void>>> zeroConnectionsFulfiller; 975 976 kj::TaskSet tasks; 977 978 HttpServer(kj::Timer& timer, const HttpHeaderTable& requestHeaderTable, 979 kj::OneOf<HttpService*, HttpServiceFactory> service, 980 Settings settings, kj::PromiseFulfillerPair<void> paf); 981 982 kj::Promise<void> listenLoop(kj::ConnectionReceiver& port); 983 984 void taskFailed(kj::Exception&& exception) override; 985 }; 986 987 // ======================================================================================= 988 // inline implementation 989 990 inline void HttpHeaderId::requireFrom(const HttpHeaderTable& table) const { 991 KJ_IREQUIRE(this->table == nullptr || this->table == &table, 992 "the provided HttpHeaderId is from the wrong HttpHeaderTable"); 993 } 994 995 inline kj::Own<HttpHeaderTable> HttpHeaderTable::Builder::build() { return kj::mv(table); } 996 inline HttpHeaderTable& HttpHeaderTable::Builder::getFutureTable() { return *table; } 997 998 inline uint HttpHeaderTable::idCount() const { return namesById.size(); } 999 1000 inline kj::StringPtr HttpHeaderTable::idToString(HttpHeaderId id) const { 1001 id.requireFrom(*this); 1002 return namesById[id.id]; 1003 } 1004 1005 inline kj::Maybe<kj::StringPtr> HttpHeaders::get(HttpHeaderId id) const { 1006 id.requireFrom(*table); 1007 auto result = indexedHeaders[id.id]; 1008 return result == nullptr ? kj::Maybe<kj::StringPtr>(nullptr) : result; 1009 } 1010 1011 inline void HttpHeaders::unset(HttpHeaderId id) { 1012 id.requireFrom(*table); 1013 indexedHeaders[id.id] = nullptr; 1014 } 1015 1016 template <typename Func> 1017 inline void HttpHeaders::forEach(Func&& func) const { 1018 for (auto i: kj::indices(indexedHeaders)) { 1019 if (indexedHeaders[i] != nullptr) { 1020 func(table->idToString(HttpHeaderId(table, i)), indexedHeaders[i]); 1021 } 1022 } 1023 1024 for (auto& header: unindexedHeaders) { 1025 func(header.name, header.value); 1026 } 1027 } 1028 1029 template <typename Func1, typename Func2> 1030 inline void HttpHeaders::forEach(Func1&& func1, Func2&& func2) const { 1031 for (auto i: kj::indices(indexedHeaders)) { 1032 if (indexedHeaders[i] != nullptr) { 1033 func1(HttpHeaderId(table, i), indexedHeaders[i]); 1034 } 1035 } 1036 1037 for (auto& header: unindexedHeaders) { 1038 func2(header.name, header.value); 1039 } 1040 } 1041 1042 } // namespace kj