capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

lexer-test.c++ (12764B)


      1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 #include "lexer.h"
     23 #include "../message.h"
     24 #include <kj/compat/gtest.h>
     25 
     26 namespace capnp {
     27 namespace compiler {
     28 namespace {
     29 
     30 class TestFailingErrorReporter: public ErrorReporter {
     31 public:
     32   void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override {
     33     KJ_FAIL_EXPECT("Parse failed.", startByte, endByte, message);
     34   }
     35 
     36   bool hadErrors() override {
     37     // Not used by lexer.
     38     return false;
     39   }
     40 };
     41 
     42 template <typename LexResult>
     43 kj::String doLex(kj::StringPtr constText) {
     44   // Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
     45   // result and return that string.  Additionally, single quotes in the input are converted to
     46   // double quotes, and double quotes in the output are converted to single quotes, to reduce the
     47   // amount of escaping needed in the test strings.
     48   //
     49   // Comparing stringifications against golden strings is ugly and brittle.  If we had a
     50   // text-format parser we could use that.  Except that said parser would probably be built on
     51   // the very lexer being tested here, so...  maybe this is the best we can reasonably do.
     52 
     53   kj::String text = heapString(constText);
     54   for (char& c: text) {
     55     // Make it easier to write input strings below.
     56     if (c == '\'') c = '\"';
     57   }
     58   MallocMessageBuilder message;
     59   auto file = message.initRoot<LexResult>();
     60   TestFailingErrorReporter errorReporter;
     61   EXPECT_TRUE(lex(text, file, errorReporter));
     62   kj::String result = kj::str(file);
     63   for (char& c: result) {
     64     // Make it easier to write golden strings below.
     65     if (c == '\"') c = '\'';
     66   }
     67   return result;
     68 }
     69 
     70 TEST(Lexer, Tokens) {
     71   EXPECT_STREQ(
     72       "(tokens = ["
     73         "(identifier = 'foo', startByte = 0, endByte = 3), "
     74         "(identifier = 'bar', startByte = 4, endByte = 7)"
     75       "])",
     76       doLex<LexedTokens>("foo bar").cStr());
     77 
     78   EXPECT_STREQ(
     79       "(tokens = ["
     80         "(identifier = 'foo', startByte = 0, endByte = 3), "
     81         "(identifier = 'bar', startByte = 15, endByte = 18)"
     82       "])",
     83       doLex<LexedTokens>("foo # comment\n bar").cStr());
     84 
     85   EXPECT_STREQ(
     86       "(tokens = ["
     87         "(stringLiteral = 'foo ', startByte = 2, endByte = 11), "
     88         "(integerLiteral = 123, startByte = 12, endByte = 15), "
     89         "(floatLiteral = 2.75, startByte = 16, endByte = 20), "
     90         "(floatLiteral = 60000, startByte = 21, endByte = 24), "
     91         "(operator = '+', startByte = 25, endByte = 26), "
     92         "(operator = '-=', startByte = 27, endByte = 29)"
     93       "])",
     94       doLex<LexedTokens>("  'foo\\x20' 123 2.75 6e4 + -=  ").cStr());
     95 
     96   EXPECT_STREQ(
     97       "(tokens = ["
     98         "(parenthesizedList = ["
     99           "["
    100             "(identifier = 'foo', startByte = 1, endByte = 4), "
    101             "(identifier = 'bar', startByte = 5, endByte = 8)"
    102           "], ["
    103             "(identifier = 'baz', startByte = 10, endByte = 13), "
    104             "(identifier = 'qux', startByte = 14, endByte = 17)"
    105           "], ["
    106             "(identifier = 'corge', startByte = 19, endByte = 24), "
    107             "(identifier = 'grault', startByte = 25, endByte = 31)"
    108           "]"
    109         "], startByte = 0, endByte = 32)"
    110       "])",
    111       doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());
    112 
    113   EXPECT_STREQ(
    114       "(tokens = ["
    115         "(parenthesizedList = ["
    116           "["
    117             "(identifier = 'foo', startByte = 1, endByte = 4), "
    118             "(identifier = 'bar', startByte = 5, endByte = 8)"
    119           "]"
    120         "], startByte = 0, endByte = 9)"
    121       "])",
    122       doLex<LexedTokens>("(foo bar)").cStr());
    123 
    124   // Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty
    125   // list.
    126   EXPECT_STREQ(
    127       "(tokens = ["
    128         "(parenthesizedList = [], startByte = 0, endByte = 4)"
    129       "])",
    130       doLex<LexedTokens>("(  )").cStr());
    131 
    132   EXPECT_STREQ(
    133       "(tokens = ["
    134         "(bracketedList = ["
    135           "["
    136             "(identifier = 'foo', startByte = 1, endByte = 4), "
    137             "(identifier = 'bar', startByte = 5, endByte = 8)"
    138           "], ["
    139             "(identifier = 'baz', startByte = 10, endByte = 13), "
    140             "(identifier = 'qux', startByte = 14, endByte = 17)"
    141           "], ["
    142             "(identifier = 'corge', startByte = 19, endByte = 24), "
    143             "(identifier = 'grault', startByte = 25, endByte = 31)"
    144           "]"
    145         "], startByte = 0, endByte = 32)"
    146       "])",
    147       doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());
    148 
    149   // Trailing commas should not create an empty final list item, but be stripped by the lexer.
    150   EXPECT_STREQ(
    151       "(tokens = ["
    152         "(bracketedList = ["
    153           "["
    154             "(identifier = 'foo', startByte = 1, endByte = 4)"
    155           "], ["
    156             "(identifier = 'bar', startByte = 6, endByte = 9)"
    157           "]"
    158         "], startByte = 0, endByte = 11)"
    159       "])",
    160       doLex<LexedTokens>("[foo, bar,]").cStr());
    161 
    162   EXPECT_STREQ(
    163       "(tokens = ["
    164         "(bracketedList = ["
    165           "["
    166             "(identifier = 'foo', startByte = 1, endByte = 4)"
    167           "], ["
    168             "(parenthesizedList = ["
    169               "["
    170                 "(identifier = 'bar', startByte = 7, endByte = 10)"
    171               "], ["
    172                 "(identifier = 'baz', startByte = 12, endByte = 15)"
    173               "]"
    174             "], startByte = 6, endByte = 16)"
    175           "]"
    176         "], startByte = 0, endByte = 17), "
    177         "(identifier = 'qux', startByte = 18, endByte = 21)"
    178       "])",
    179       doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());
    180 
    181   EXPECT_STREQ(
    182       "(tokens = ["
    183         "(identifier = 'foo', startByte = 0, endByte = 3), "
    184         "(identifier = 'bar', startByte = 7, endByte = 10)"
    185       "])",
    186       doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
    187 }
    188 
    189 TEST(Lexer, Statements) {
    190   EXPECT_STREQ(
    191       "(statements = ["
    192         "(tokens = ["
    193           "(identifier = 'foo', startByte = 0, endByte = 3), "
    194           "(identifier = 'bar', startByte = 4, endByte = 7)"
    195         "], line = void, startByte = 0, endByte = 8)"
    196       "])",
    197       doLex<LexedStatements>("foo bar;").cStr());
    198 
    199   EXPECT_STREQ(
    200       "(statements = ["
    201         "(tokens = ["
    202           "(identifier = 'foo', startByte = 0, endByte = 3)"
    203         "], line = void, startByte = 0, endByte = 4), "
    204         "(tokens = ["
    205           "(identifier = 'bar', startByte = 5, endByte = 8)"
    206         "], line = void, startByte = 5, endByte = 9), "
    207         "(tokens = ["
    208           "(identifier = 'baz', startByte = 10, endByte = 13)"
    209         "], line = void, startByte = 10, endByte = 14)"
    210       "])",
    211       doLex<LexedStatements>("foo; bar; baz; ").cStr());
    212 
    213   EXPECT_STREQ(
    214       "(statements = ["
    215         "("
    216           "tokens = ["
    217             "(identifier = 'foo', startByte = 0, endByte = 3)"
    218           "], "
    219           "block = ["
    220             "(tokens = ["
    221               "(identifier = 'bar', startByte = 5, endByte = 8)"
    222             "], line = void, startByte = 5, endByte = 9), "
    223             "(tokens = ["
    224               "(identifier = 'baz', startByte = 10, endByte = 13)"
    225             "], line = void, startByte = 10, endByte = 14)"
    226           "], "
    227           "startByte = 0, endByte = 15"
    228         "), "
    229         "(tokens = ["
    230           "(identifier = 'qux', startByte = 16, endByte = 19)"
    231         "], line = void, startByte = 16, endByte = 20)"
    232       "])",
    233       doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
    234 }
    235 
    236 TEST(Lexer, DocComments) {
    237   EXPECT_STREQ(
    238       "(statements = ["
    239         "("
    240           "tokens = ["
    241             "(identifier = 'foo', startByte = 0, endByte = 3)"
    242           "], "
    243           "line = void, "
    244           "docComment = 'blah blah\\n', "
    245           "startByte = 0, endByte = 16"
    246         ")"
    247       "])",
    248       doLex<LexedStatements>("foo; # blah blah").cStr());
    249 
    250   EXPECT_STREQ(
    251       "(statements = ["
    252         "("
    253           "tokens = ["
    254             "(identifier = 'foo', startByte = 0, endByte = 3)"
    255           "], "
    256           "line = void, "
    257           "docComment = 'blah blah\\n', "
    258           "startByte = 0, endByte = 15"
    259         ")"
    260       "])",
    261       doLex<LexedStatements>("foo; #blah blah").cStr());
    262 
    263   EXPECT_STREQ(
    264       "(statements = ["
    265         "("
    266           "tokens = ["
    267             "(identifier = 'foo', startByte = 0, endByte = 3)"
    268           "], "
    269           "line = void, "
    270           "docComment = ' blah blah\\n', "
    271           "startByte = 0, endByte = 17"
    272         ")"
    273       "])",
    274       doLex<LexedStatements>("foo; #  blah blah").cStr());
    275 
    276   EXPECT_STREQ(
    277       "(statements = ["
    278         "("
    279           "tokens = ["
    280             "(identifier = 'foo', startByte = 0, endByte = 3)"
    281           "], "
    282           "line = void, "
    283           "docComment = 'blah blah\\n', "
    284           "startByte = 0, endByte = 16"
    285         ")"
    286       "])",
    287       doLex<LexedStatements>("foo;\n# blah blah").cStr());
    288 
    289   EXPECT_STREQ(
    290       "(statements = ["
    291         "("
    292           "tokens = ["
    293             "(identifier = 'foo', startByte = 0, endByte = 3)"
    294           "], "
    295           "line = void, "
    296           "startByte = 0, endByte = 4"
    297         ")"
    298       "])",
    299       doLex<LexedStatements>("foo;\n\n# blah blah").cStr());
    300 
    301   EXPECT_STREQ(
    302       "(statements = ["
    303         "("
    304           "tokens = ["
    305             "(identifier = 'foo', startByte = 0, endByte = 3)"
    306           "], "
    307           "line = void, "
    308           "docComment = 'bar baz\\nqux corge\\n', "
    309           "startByte = 0, endByte = 30"
    310         ")"
    311       "])",
    312       doLex<LexedStatements>("foo;\n # bar baz\n  # qux corge\n\n# grault\n# garply").cStr());
    313 
    314   EXPECT_STREQ(
    315       "(statements = ["
    316         "("
    317           "tokens = ["
    318             "(identifier = 'foo', startByte = 0, endByte = 3)"
    319           "], "
    320           "block = ["
    321             "(tokens = ["
    322               "(identifier = 'bar', startByte = 17, endByte = 20)"
    323             "], line = void, docComment = 'hi\\n', startByte = 17, endByte = 27), "
    324             "(tokens = ["
    325               "(identifier = 'baz', startByte = 28, endByte = 31)"
    326             "], line = void, startByte = 28, endByte = 32)"
    327           "], "
    328           "docComment = 'blah blah\\n', "
    329           "startByte = 0, endByte = 44"
    330         "), "
    331         "(tokens = ["
    332           "(identifier = 'qux', startByte = 44, endByte = 47)"
    333         "], line = void, startByte = 44, endByte = 48)"
    334       "])",
    335       doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
    336 
    337   EXPECT_STREQ(
    338       "(statements = ["
    339         "("
    340           "tokens = ["
    341             "(identifier = 'foo', startByte = 0, endByte = 3)"
    342           "], "
    343           "block = ["
    344             "(tokens = ["
    345               "(identifier = 'bar', startByte = 5, endByte = 8)"
    346             "], line = void, startByte = 5, endByte = 9), "
    347             "(tokens = ["
    348               "(identifier = 'baz', startByte = 10, endByte = 13)"
    349             "], line = void, startByte = 10, endByte = 14)"
    350           "], "
    351           "docComment = 'late comment\\n', "
    352           "startByte = 0, endByte = 31"
    353         "), "
    354         "(tokens = ["
    355           "(identifier = 'qux', startByte = 31, endByte = 34)"
    356         "], line = void, startByte = 31, endByte = 35)"
    357       "])",
    358       doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
    359 }
    360 
    361 TEST(Lexer, Utf8Bom) {
    362   EXPECT_STREQ(
    363       "(tokens = ["
    364         "(identifier = 'foo', startByte = 3, endByte = 6), "
    365         "(identifier = 'bar', startByte = 7, endByte = 10), "
    366         "(identifier = 'baz', startByte = 13, endByte = 16)"
    367       "])",
    368       doLex<LexedTokens>("\xef\xbb\xbf""foo bar\xef\xbb\xbf""baz").cStr());
    369 }
    370 
    371 }  // namespace
    372 }  // namespace compiler
    373 }  // namespace capnp