capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

serialize.h (11551B)


      1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 // This file implements a simple serialization format for Cap'n Proto messages.  The format
     23 // is as follows:
     24 //
     25 // * 32-bit little-endian segment count (4 bytes).
     26 // * 32-bit little-endian size of each segment (4*(segment count) bytes).
     27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even
     28 //     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
     29 // * Data from each segment, in order (8*sum(segment sizes) bytes)
     30 //
     31 // This format has some important properties:
     32 // - It is self-delimiting, so multiple messages may be written to a stream without any external
     33 //   delimiter.
     34 // - The total size and position of each segment can be determined by reading only the first part
     35 //   of the message, allowing lazy and random-access reading of the segment data.
     36 // - A message is always at least 8 bytes.
     37 // - A single-segment message can be read entirely in two system calls with no buffering.
     38 // - A multi-segment message can be read entirely in three system calls with no buffering.
     39 // - The format is appropriate for mmap()ing since all data is aligned.
     40 
     41 #pragma once
     42 
     43 #include "message.h"
     44 #include <kj/io.h>
     45 
     46 CAPNP_BEGIN_HEADER
     47 
     48 namespace capnp {
     49 
     50 class FlatArrayMessageReader: public MessageReader {
     51   // Parses a message from a flat array.  Note that it makes sense to use this together with mmap()
     52   // for extremely fast parsing.
     53 
     54 public:
     55   FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
     56   // The array must remain valid until the MessageReader is destroyed.
     57 
     58   kj::ArrayPtr<const word> getSegment(uint id) override;
     59 
     60   const word* getEnd() const { return end; }
     61   // Get a pointer just past the end of the message as determined by reading the message header.
     62   // This could actually be before the end of the input array.  This pointer is useful e.g. if
     63   // you know that the input array has extra stuff appended after the message and you want to
     64   // get at it.
     65 
     66 private:
     67   // Optimize for single-segment case.
     68   kj::ArrayPtr<const word> segment0;
     69   kj::Array<kj::ArrayPtr<const word>> moreSegments;
     70   const word* end;
     71 };
     72 
     73 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
     74     kj::ArrayPtr<const word> array, MessageBuilder& target,
     75     ReaderOptions options = ReaderOptions());
     76 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the
     77 // content into the target `MessageBuilder`, verifying that the message structure is valid
     78 // (although not necessarily that it matches the desired schema).
     79 //
     80 // Returns an ArrayPtr containing any words left over in the array after consuming the whole
     81 // message. This is useful when reading multiple messages that have been concatenated. See also
     82 // FlatArrayMessageReader::getEnd().
     83 //
     84 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
     85 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
     86 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
     87 
     88 kj::Array<word> messageToFlatArray(MessageBuilder& builder);
     89 // Constructs a flat array containing the entire content of the given message.
     90 //
     91 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
     92 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
     93 // deleted. For example:
     94 //
     95 //     kj::Array<capnp::word> words = messageToFlatArray(myMessage);
     96 //     kj::ArrayPtr<kj::byte> bytes = words.asBytes();
     97 //     write(fd, bytes.begin(), bytes.size());
     98 
     99 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
    100 // Version of messageToFlatArray that takes a raw segment array.
    101 
    102 size_t computeSerializedSizeInWords(MessageBuilder& builder);
    103 // Returns the size, in words, that will be needed to serialize the message, including the header.
    104 
    105 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
    106 // Version of computeSerializedSizeInWords that takes a raw segment array.
    107 
    108 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
    109 // Given a prefix of a serialized message, try to determine the expected total size of the message,
    110 // in words. The returned size is based on the information known so far; it may be an underestimate
    111 // if the prefix doesn't contain the full segment table.
    112 //
    113 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet
    114 // complete and the app cannot parse it yet. If the returned value is less than or equal to
    115 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any
    116 // remaining bytes are part of the next message.
    117 //
    118 // This function is useful when reading messages from a stream in an asynchronous way, but when
    119 // using the full KJ async infrastructure would be too difficult. Each time bytes are received,
    120 // use this function to determine if an entire message is ready to be parsed.
    121 
    122 // =======================================================================================
    123 
    124 class InputStreamMessageReader: public MessageReader {
    125   // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
    126   // for a subclass specific to file descriptors.
    127 
    128 public:
    129   InputStreamMessageReader(kj::InputStream& inputStream,
    130                            ReaderOptions options = ReaderOptions(),
    131                            kj::ArrayPtr<word> scratchSpace = nullptr);
    132   ~InputStreamMessageReader() noexcept(false);
    133 
    134   // implements MessageReader ----------------------------------------
    135   kj::ArrayPtr<const word> getSegment(uint id) override;
    136 
    137 private:
    138   kj::InputStream& inputStream;
    139   byte* readPos;
    140 
    141   // Optimize for single-segment case.
    142   kj::ArrayPtr<const word> segment0;
    143   kj::Array<kj::ArrayPtr<const word>> moreSegments;
    144 
    145   kj::Array<word> ownedSpace;
    146   // Only if scratchSpace wasn't big enough.
    147 
    148   kj::UnwindDetector unwindDetector;
    149 };
    150 
    151 void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
    152                      ReaderOptions options = ReaderOptions(),
    153                      kj::ArrayPtr<word> scratchSpace = nullptr);
    154 // Convenience function which reads a message using `InputStreamMessageReader` then copies the
    155 // content into the target `MessageBuilder`, verifying that the message structure is valid
    156 // (although not necessarily that it matches the desired schema).
    157 //
    158 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
    159 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
    160 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
    161 
    162 void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
    163 // Write the message to the given output stream.
    164 
    165 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
    166 // Write the segment array to the given output stream.
    167 
    168 // =======================================================================================
    169 // Specializations for reading from / writing to file descriptors.
    170 
    171 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
    172   // A MessageReader that reads from a stream-based file descriptor.
    173 
    174 public:
    175   StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
    176                         kj::ArrayPtr<word> scratchSpace = nullptr)
    177       : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
    178   // Read message from a file descriptor, without taking ownership of the descriptor.
    179 
    180   StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
    181                         kj::ArrayPtr<word> scratchSpace = nullptr)
    182       : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
    183   // Read a message from a file descriptor, taking ownership of the descriptor.
    184 
    185   ~StreamFdMessageReader() noexcept(false);
    186 };
    187 
    188 void readMessageCopyFromFd(int fd, MessageBuilder& target,
    189                            ReaderOptions options = ReaderOptions(),
    190                            kj::ArrayPtr<word> scratchSpace = nullptr);
    191 // Convenience function which reads a message using `StreamFdMessageReader` then copies the
    192 // content into the target `MessageBuilder`, verifying that the message structure is valid
    193 // (although not necessarily that it matches the desired schema).
    194 //
    195 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
    196 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
    197 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
    198 
    199 void writeMessageToFd(int fd, MessageBuilder& builder);
    200 // Write the message to the given file descriptor.
    201 //
    202 // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
    203 // you catch this exception at the call site.  If throwing an exception is not acceptable, you
    204 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
    205 
    206 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
    207 // Write the segment array to the given file descriptor.
    208 //
    209 // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
    210 // you catch this exception at the call site.  If throwing an exception is not acceptable, you
    211 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
    212 
    213 // =======================================================================================
    214 // inline stuff
    215 
    216 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
    217   return messageToFlatArray(builder.getSegmentsForOutput());
    218 }
    219 
    220 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
    221   return computeSerializedSizeInWords(builder.getSegmentsForOutput());
    222 }
    223 
    224 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
    225   writeMessage(output, builder.getSegmentsForOutput());
    226 }
    227 
    228 inline void writeMessageToFd(int fd, MessageBuilder& builder) {
    229   writeMessageToFd(fd, builder.getSegmentsForOutput());
    230 }
    231 
    232 }  // namespace capnp
    233 
    234 CAPNP_END_HEADER