serialize.h (11551B)
1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 // This file implements a simple serialization format for Cap'n Proto messages. The format 23 // is as follows: 24 // 25 // * 32-bit little-endian segment count (4 bytes). 26 // * 32-bit little-endian size of each segment (4*(segment count) bytes). 27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even 28 // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) 29 // * Data from each segment, in order (8*sum(segment sizes) bytes) 30 // 31 // This format has some important properties: 32 // - It is self-delimiting, so multiple messages may be written to a stream without any external 33 // delimiter. 34 // - The total size and position of each segment can be determined by reading only the first part 35 // of the message, allowing lazy and random-access reading of the segment data. 36 // - A message is always at least 8 bytes. 37 // - A single-segment message can be read entirely in two system calls with no buffering. 38 // - A multi-segment message can be read entirely in three system calls with no buffering. 39 // - The format is appropriate for mmap()ing since all data is aligned. 40 41 #pragma once 42 43 #include "message.h" 44 #include <kj/io.h> 45 46 CAPNP_BEGIN_HEADER 47 48 namespace capnp { 49 50 class FlatArrayMessageReader: public MessageReader { 51 // Parses a message from a flat array. Note that it makes sense to use this together with mmap() 52 // for extremely fast parsing. 53 54 public: 55 FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); 56 // The array must remain valid until the MessageReader is destroyed. 57 58 kj::ArrayPtr<const word> getSegment(uint id) override; 59 60 const word* getEnd() const { return end; } 61 // Get a pointer just past the end of the message as determined by reading the message header. 62 // This could actually be before the end of the input array. This pointer is useful e.g. if 63 // you know that the input array has extra stuff appended after the message and you want to 64 // get at it. 65 66 private: 67 // Optimize for single-segment case. 68 kj::ArrayPtr<const word> segment0; 69 kj::Array<kj::ArrayPtr<const word>> moreSegments; 70 const word* end; 71 }; 72 73 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( 74 kj::ArrayPtr<const word> array, MessageBuilder& target, 75 ReaderOptions options = ReaderOptions()); 76 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the 77 // content into the target `MessageBuilder`, verifying that the message structure is valid 78 // (although not necessarily that it matches the desired schema). 79 // 80 // Returns an ArrayPtr containing any words left over in the array after consuming the whole 81 // message. This is useful when reading multiple messages that have been concatenated. See also 82 // FlatArrayMessageReader::getEnd(). 83 // 84 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one 85 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not 86 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) 87 88 kj::Array<word> messageToFlatArray(MessageBuilder& builder); 89 // Constructs a flat array containing the entire content of the given message. 90 // 91 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that 92 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being 93 // deleted. For example: 94 // 95 // kj::Array<capnp::word> words = messageToFlatArray(myMessage); 96 // kj::ArrayPtr<kj::byte> bytes = words.asBytes(); 97 // write(fd, bytes.begin(), bytes.size()); 98 99 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); 100 // Version of messageToFlatArray that takes a raw segment array. 101 102 size_t computeSerializedSizeInWords(MessageBuilder& builder); 103 // Returns the size, in words, that will be needed to serialize the message, including the header. 104 105 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); 106 // Version of computeSerializedSizeInWords that takes a raw segment array. 107 108 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); 109 // Given a prefix of a serialized message, try to determine the expected total size of the message, 110 // in words. The returned size is based on the information known so far; it may be an underestimate 111 // if the prefix doesn't contain the full segment table. 112 // 113 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet 114 // complete and the app cannot parse it yet. If the returned value is less than or equal to 115 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any 116 // remaining bytes are part of the next message. 117 // 118 // This function is useful when reading messages from a stream in an asynchronous way, but when 119 // using the full KJ async infrastructure would be too difficult. Each time bytes are received, 120 // use this function to determine if an entire message is ready to be parsed. 121 122 // ======================================================================================= 123 124 class InputStreamMessageReader: public MessageReader { 125 // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader 126 // for a subclass specific to file descriptors. 127 128 public: 129 InputStreamMessageReader(kj::InputStream& inputStream, 130 ReaderOptions options = ReaderOptions(), 131 kj::ArrayPtr<word> scratchSpace = nullptr); 132 ~InputStreamMessageReader() noexcept(false); 133 134 // implements MessageReader ---------------------------------------- 135 kj::ArrayPtr<const word> getSegment(uint id) override; 136 137 private: 138 kj::InputStream& inputStream; 139 byte* readPos; 140 141 // Optimize for single-segment case. 142 kj::ArrayPtr<const word> segment0; 143 kj::Array<kj::ArrayPtr<const word>> moreSegments; 144 145 kj::Array<word> ownedSpace; 146 // Only if scratchSpace wasn't big enough. 147 148 kj::UnwindDetector unwindDetector; 149 }; 150 151 void readMessageCopy(kj::InputStream& input, MessageBuilder& target, 152 ReaderOptions options = ReaderOptions(), 153 kj::ArrayPtr<word> scratchSpace = nullptr); 154 // Convenience function which reads a message using `InputStreamMessageReader` then copies the 155 // content into the target `MessageBuilder`, verifying that the message structure is valid 156 // (although not necessarily that it matches the desired schema). 157 // 158 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one 159 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not 160 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) 161 162 void writeMessage(kj::OutputStream& output, MessageBuilder& builder); 163 // Write the message to the given output stream. 164 165 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); 166 // Write the segment array to the given output stream. 167 168 // ======================================================================================= 169 // Specializations for reading from / writing to file descriptors. 170 171 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { 172 // A MessageReader that reads from a stream-based file descriptor. 173 174 public: 175 StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), 176 kj::ArrayPtr<word> scratchSpace = nullptr) 177 : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} 178 // Read message from a file descriptor, without taking ownership of the descriptor. 179 180 StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), 181 kj::ArrayPtr<word> scratchSpace = nullptr) 182 : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} 183 // Read a message from a file descriptor, taking ownership of the descriptor. 184 185 ~StreamFdMessageReader() noexcept(false); 186 }; 187 188 void readMessageCopyFromFd(int fd, MessageBuilder& target, 189 ReaderOptions options = ReaderOptions(), 190 kj::ArrayPtr<word> scratchSpace = nullptr); 191 // Convenience function which reads a message using `StreamFdMessageReader` then copies the 192 // content into the target `MessageBuilder`, verifying that the message structure is valid 193 // (although not necessarily that it matches the desired schema). 194 // 195 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one 196 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not 197 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) 198 199 void writeMessageToFd(int fd, MessageBuilder& builder); 200 // Write the message to the given file descriptor. 201 // 202 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure 203 // you catch this exception at the call site. If throwing an exception is not acceptable, you 204 // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). 205 206 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); 207 // Write the segment array to the given file descriptor. 208 // 209 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure 210 // you catch this exception at the call site. If throwing an exception is not acceptable, you 211 // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). 212 213 // ======================================================================================= 214 // inline stuff 215 216 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { 217 return messageToFlatArray(builder.getSegmentsForOutput()); 218 } 219 220 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { 221 return computeSerializedSizeInWords(builder.getSegmentsForOutput()); 222 } 223 224 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { 225 writeMessage(output, builder.getSegmentsForOutput()); 226 } 227 228 inline void writeMessageToFd(int fd, MessageBuilder& builder) { 229 writeMessageToFd(fd, builder.getSegmentsForOutput()); 230 } 231 232 } // namespace capnp 233 234 CAPNP_END_HEADER