capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

lexer.c++ (12086B)


      1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 #include "lexer.h"
     23 #include <kj/parse/char.h>
     24 #include <kj/debug.h>
     25 
     26 namespace capnp {
     27 namespace compiler {
     28 
     29 namespace p = kj::parse;
     30 
     31 bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
     32          ErrorReporter& errorReporter) {
     33   Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
     34 
     35   auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput);
     36 
     37   Lexer::ParserInput parserInput(input.begin(), input.end());
     38   kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput);
     39 
     40   KJ_IF_MAYBE(output, parseOutput) {
     41     auto l = result.initStatements(output->size());
     42     for (uint i = 0; i < output->size(); i++) {
     43       l.adoptWithCaveats(i, kj::mv((*output)[i]));
     44     }
     45     return true;
     46   } else {
     47     uint32_t best = parserInput.getBest();
     48     errorReporter.addError(best, best, kj::str("Parse error."));
     49     return false;
     50   }
     51 }
     52 
     53 bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
     54          ErrorReporter& errorReporter) {
     55   Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
     56 
     57   auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput);
     58 
     59   Lexer::ParserInput parserInput(input.begin(), input.end());
     60   kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput);
     61 
     62   KJ_IF_MAYBE(output, parseOutput) {
     63     auto l = result.initTokens(output->size());
     64     for (uint i = 0; i < output->size(); i++) {
     65       l.adoptWithCaveats(i, kj::mv((*output)[i]));
     66     }
     67     return true;
     68   } else {
     69     uint32_t best = parserInput.getBest();
     70     errorReporter.addError(best, best, kj::str("Parse error."));
     71     return false;
     72   }
     73 }
     74 
     75 namespace {
     76 
     77 typedef p::Span<uint32_t> Location;
     78 
     79 Token::Builder initTok(Orphan<Token>& t, const Location& loc) {
     80   auto builder = t.get();
     81   builder.setStartByte(loc.begin());
     82   builder.setEndByte(loc.end());
     83   return builder;
     84 }
     85 
     86 void buildTokenSequenceList(List<List<Token>>::Builder builder,
     87                             kj::Array<kj::Array<Orphan<Token>>>&& items) {
     88   for (uint i = 0; i < items.size(); i++) {
     89     auto& item = items[i];
     90     auto itemBuilder = builder.init(i, item.size());
     91     for (uint j = 0; j < item.size(); j++) {
     92       itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
     93     }
     94   }
     95 }
     96 
     97 void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) {
     98   size_t size = 0;
     99   for (auto& line: comment) {
    100     size += line.size() + 1;  // include newline
    101   }
    102   Text::Builder builder = statement.initDocComment(size);
    103   char* pos = builder.begin();
    104   for (auto& line: comment) {
    105     memcpy(pos, line.begin(), line.size());
    106     pos += line.size();
    107     *pos++ = '\n';
    108   }
    109   KJ_ASSERT(pos == builder.end());
    110 }
    111 
    112 constexpr auto discardComment =
    113     sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))),
    114              p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
    115 constexpr auto saveComment =
    116     sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())),
    117              p::charsToString(p::many(p::anyOfChars("\n").invert())),
    118              p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
    119 
    120 constexpr auto utf8Bom =
    121     sequence(p::exactChar<'\xef'>(), p::exactChar<'\xbb'>(), p::exactChar<'\xbf'>());
    122 
    123 constexpr auto bomsAndWhitespace =
    124     sequence(p::discardWhitespace,
    125              p::discard(p::many(sequence(utf8Bom, p::discardWhitespace))));
    126 
    127 constexpr auto commentsAndWhitespace =
    128     sequence(bomsAndWhitespace,
    129              p::discard(p::many(sequence(discardComment, bomsAndWhitespace))));
    130 
    131 constexpr auto discardLineWhitespace =
    132     p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert())));
    133 constexpr auto newline = p::oneOf(
    134     p::exactChar<'\n'>(),
    135     sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
    136 
    137 constexpr auto docComment = p::optional(p::sequence(
    138     discardLineWhitespace,
    139     p::discard(p::optional(newline)),
    140     p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
    141 // Parses a set of comment lines preceded by at most one newline and with no intervening blank
    142 // lines.
    143 
    144 }  // namespace
    145 
    146 Lexer::Lexer(Orphanage orphanageParam, ErrorReporter& errorReporter)
    147     : orphanage(orphanageParam) {
    148 
    149   // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
    150   // for us to use parsers.tokenSequence even though we haven't yet constructed it.
    151   auto& tokenSequence = parsers.tokenSequence;
    152 
    153   auto& commaDelimitedList = arena.copy(p::transform(
    154       p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
    155       [](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
    156           -> kj::Array<kj::Array<Orphan<Token>>> {
    157         if (first == nullptr && rest == nullptr) {
    158           // Completely empty list.
    159           return nullptr;
    160         } else {
    161           uint restSize = rest.size();
    162           if (restSize > 0 && rest[restSize - 1] == nullptr) {
    163             // Allow for trailing commas by shortening the list by one item if the final token is
    164             // nullptr
    165             restSize--;
    166           }
    167           auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(1 + restSize); // first+rest
    168           result.add(kj::mv(first));
    169           for (uint i = 0; i < restSize ; i++) {
    170             result.add(kj::mv(rest[i]));
    171           }
    172           return result.finish();
    173         }
    174       }));
    175 
    176   auto& token = arena.copy(p::oneOf(
    177       p::transformWithLocation(p::identifier,
    178           [this](Location loc, kj::String name) -> Orphan<Token> {
    179             auto t = orphanage.newOrphan<Token>();
    180             initTok(t, loc).setIdentifier(name);
    181             return t;
    182           }),
    183       p::transformWithLocation(p::doubleQuotedString,
    184           [this](Location loc, kj::String text) -> Orphan<Token> {
    185             auto t = orphanage.newOrphan<Token>();
    186             initTok(t, loc).setStringLiteral(text);
    187             return t;
    188           }),
    189       p::transformWithLocation(p::doubleQuotedHexBinary,
    190           [this](Location loc, kj::Array<byte> data) -> Orphan<Token> {
    191             auto t = orphanage.newOrphan<Token>();
    192             initTok(t, loc).setBinaryLiteral(data);
    193             return t;
    194           }),
    195       p::transformWithLocation(p::integer,
    196           [this](Location loc, uint64_t i) -> Orphan<Token> {
    197             auto t = orphanage.newOrphan<Token>();
    198             initTok(t, loc).setIntegerLiteral(i);
    199             return t;
    200           }),
    201       p::transformWithLocation(p::number,
    202           [this](Location loc, double x) -> Orphan<Token> {
    203             auto t = orphanage.newOrphan<Token>();
    204             initTok(t, loc).setFloatLiteral(x);
    205             return t;
    206           }),
    207       p::transformWithLocation(
    208           p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
    209           [this](Location loc, kj::String text) -> Orphan<Token> {
    210             auto t = orphanage.newOrphan<Token>();
    211             initTok(t, loc).setOperator(text);
    212             return t;
    213           }),
    214       p::transformWithLocation(
    215           sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
    216           [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
    217             auto t = orphanage.newOrphan<Token>();
    218             buildTokenSequenceList(
    219                 initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
    220             return t;
    221           }),
    222       p::transformWithLocation(
    223           sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
    224           [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
    225             auto t = orphanage.newOrphan<Token>();
    226             buildTokenSequenceList(
    227                 initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
    228             return t;
    229           }),
    230       p::transformOrReject(p::transformWithLocation(
    231           p::oneOf(sequence(p::exactChar<'\xff'>(), p::exactChar<'\xfe'>()),
    232                    sequence(p::exactChar<'\xfe'>(), p::exactChar<'\xff'>()),
    233                    sequence(p::exactChar<'\x00'>())),
    234           [&errorReporter](Location loc) -> kj::Maybe<Orphan<Token>> {
    235             errorReporter.addError(loc.begin(), loc.end(),
    236                 "Non-UTF-8 input detected. Cap'n Proto schema files must be UTF-8 text.");
    237             return nullptr;
    238           }), [](kj::Maybe<Orphan<Token>> param) { return param; })));
    239   parsers.tokenSequence = arena.copy(p::sequence(
    240       commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace))));
    241 
    242   auto& statementSequence = parsers.statementSequence;
    243 
    244   auto& statementEnd = arena.copy(p::oneOf(
    245       transform(p::sequence(p::exactChar<';'>(), docComment),
    246           [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
    247             auto result = orphanage.newOrphan<Statement>();
    248             auto builder = result.get();
    249             KJ_IF_MAYBE(c, comment) {
    250               attachDocComment(builder, kj::mv(*c));
    251             }
    252             builder.setLine();
    253             return result;
    254           }),
    255       transform(
    256           p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
    257                       docComment),
    258           [this](kj::Maybe<kj::Array<kj::String>>&& comment,
    259                  kj::Array<Orphan<Statement>>&& statements,
    260                  kj::Maybe<kj::Array<kj::String>>&& lateComment)
    261               -> Orphan<Statement> {
    262             auto result = orphanage.newOrphan<Statement>();
    263             auto builder = result.get();
    264             KJ_IF_MAYBE(c, comment) {
    265               attachDocComment(builder, kj::mv(*c));
    266             } else KJ_IF_MAYBE(c, lateComment) {
    267               attachDocComment(builder, kj::mv(*c));
    268             }
    269             auto list = builder.initBlock(statements.size());
    270             for (uint i = 0; i < statements.size(); i++) {
    271               list.adoptWithCaveats(i, kj::mv(statements[i]));
    272             }
    273             return result;
    274           })
    275       ));
    276 
    277   auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
    278       [](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
    279         auto builder = statement.get();
    280         auto tokensBuilder = builder.initTokens(tokens.size());
    281         for (uint i = 0; i < tokens.size(); i++) {
    282           tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
    283         }
    284         builder.setStartByte(loc.begin());
    285         builder.setEndByte(loc.end());
    286         return kj::mv(statement);
    287       }));
    288 
    289   parsers.statementSequence = arena.copy(sequence(
    290       commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace))));
    291 
    292   parsers.token = token;
    293   parsers.statement = statement;
    294   parsers.emptySpace = commentsAndWhitespace;
    295 }
    296 
    297 Lexer::~Lexer() noexcept(false) {}
    298 
    299 }  // namespace compiler
    300 }  // namespace capnp