lexer-test.c++ (12764B)
1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #include "lexer.h" 23 #include "../message.h" 24 #include <kj/compat/gtest.h> 25 26 namespace capnp { 27 namespace compiler { 28 namespace { 29 30 class TestFailingErrorReporter: public ErrorReporter { 31 public: 32 void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override { 33 KJ_FAIL_EXPECT("Parse failed.", startByte, endByte, message); 34 } 35 36 bool hadErrors() override { 37 // Not used by lexer. 38 return false; 39 } 40 }; 41 42 template <typename LexResult> 43 kj::String doLex(kj::StringPtr constText) { 44 // Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the 45 // result and return that string. Additionally, single quotes in the input are converted to 46 // double quotes, and double quotes in the output are converted to single quotes, to reduce the 47 // amount of escaping needed in the test strings. 48 // 49 // Comparing stringifications against golden strings is ugly and brittle. If we had a 50 // text-format parser we could use that. Except that said parser would probably be built on 51 // the very lexer being tested here, so... maybe this is the best we can reasonably do. 52 53 kj::String text = heapString(constText); 54 for (char& c: text) { 55 // Make it easier to write input strings below. 56 if (c == '\'') c = '\"'; 57 } 58 MallocMessageBuilder message; 59 auto file = message.initRoot<LexResult>(); 60 TestFailingErrorReporter errorReporter; 61 EXPECT_TRUE(lex(text, file, errorReporter)); 62 kj::String result = kj::str(file); 63 for (char& c: result) { 64 // Make it easier to write golden strings below. 65 if (c == '\"') c = '\''; 66 } 67 return result; 68 } 69 70 TEST(Lexer, Tokens) { 71 EXPECT_STREQ( 72 "(tokens = [" 73 "(identifier = 'foo', startByte = 0, endByte = 3), " 74 "(identifier = 'bar', startByte = 4, endByte = 7)" 75 "])", 76 doLex<LexedTokens>("foo bar").cStr()); 77 78 EXPECT_STREQ( 79 "(tokens = [" 80 "(identifier = 'foo', startByte = 0, endByte = 3), " 81 "(identifier = 'bar', startByte = 15, endByte = 18)" 82 "])", 83 doLex<LexedTokens>("foo # comment\n bar").cStr()); 84 85 EXPECT_STREQ( 86 "(tokens = [" 87 "(stringLiteral = 'foo ', startByte = 2, endByte = 11), " 88 "(integerLiteral = 123, startByte = 12, endByte = 15), " 89 "(floatLiteral = 2.75, startByte = 16, endByte = 20), " 90 "(floatLiteral = 60000, startByte = 21, endByte = 24), " 91 "(operator = '+', startByte = 25, endByte = 26), " 92 "(operator = '-=', startByte = 27, endByte = 29)" 93 "])", 94 doLex<LexedTokens>(" 'foo\\x20' 123 2.75 6e4 + -= ").cStr()); 95 96 EXPECT_STREQ( 97 "(tokens = [" 98 "(parenthesizedList = [" 99 "[" 100 "(identifier = 'foo', startByte = 1, endByte = 4), " 101 "(identifier = 'bar', startByte = 5, endByte = 8)" 102 "], [" 103 "(identifier = 'baz', startByte = 10, endByte = 13), " 104 "(identifier = 'qux', startByte = 14, endByte = 17)" 105 "], [" 106 "(identifier = 'corge', startByte = 19, endByte = 24), " 107 "(identifier = 'grault', startByte = 25, endByte = 31)" 108 "]" 109 "], startByte = 0, endByte = 32)" 110 "])", 111 doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr()); 112 113 EXPECT_STREQ( 114 "(tokens = [" 115 "(parenthesizedList = [" 116 "[" 117 "(identifier = 'foo', startByte = 1, endByte = 4), " 118 "(identifier = 'bar', startByte = 5, endByte = 8)" 119 "]" 120 "], startByte = 0, endByte = 9)" 121 "])", 122 doLex<LexedTokens>("(foo bar)").cStr()); 123 124 // Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty 125 // list. 126 EXPECT_STREQ( 127 "(tokens = [" 128 "(parenthesizedList = [], startByte = 0, endByte = 4)" 129 "])", 130 doLex<LexedTokens>("( )").cStr()); 131 132 EXPECT_STREQ( 133 "(tokens = [" 134 "(bracketedList = [" 135 "[" 136 "(identifier = 'foo', startByte = 1, endByte = 4), " 137 "(identifier = 'bar', startByte = 5, endByte = 8)" 138 "], [" 139 "(identifier = 'baz', startByte = 10, endByte = 13), " 140 "(identifier = 'qux', startByte = 14, endByte = 17)" 141 "], [" 142 "(identifier = 'corge', startByte = 19, endByte = 24), " 143 "(identifier = 'grault', startByte = 25, endByte = 31)" 144 "]" 145 "], startByte = 0, endByte = 32)" 146 "])", 147 doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr()); 148 149 // Trailing commas should not create an empty final list item, but be stripped by the lexer. 150 EXPECT_STREQ( 151 "(tokens = [" 152 "(bracketedList = [" 153 "[" 154 "(identifier = 'foo', startByte = 1, endByte = 4)" 155 "], [" 156 "(identifier = 'bar', startByte = 6, endByte = 9)" 157 "]" 158 "], startByte = 0, endByte = 11)" 159 "])", 160 doLex<LexedTokens>("[foo, bar,]").cStr()); 161 162 EXPECT_STREQ( 163 "(tokens = [" 164 "(bracketedList = [" 165 "[" 166 "(identifier = 'foo', startByte = 1, endByte = 4)" 167 "], [" 168 "(parenthesizedList = [" 169 "[" 170 "(identifier = 'bar', startByte = 7, endByte = 10)" 171 "], [" 172 "(identifier = 'baz', startByte = 12, endByte = 15)" 173 "]" 174 "], startByte = 6, endByte = 16)" 175 "]" 176 "], startByte = 0, endByte = 17), " 177 "(identifier = 'qux', startByte = 18, endByte = 21)" 178 "])", 179 doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr()); 180 181 EXPECT_STREQ( 182 "(tokens = [" 183 "(identifier = 'foo', startByte = 0, endByte = 3), " 184 "(identifier = 'bar', startByte = 7, endByte = 10)" 185 "])", 186 doLex<LexedTokens>("foo\n\r\t\vbar").cStr()); 187 } 188 189 TEST(Lexer, Statements) { 190 EXPECT_STREQ( 191 "(statements = [" 192 "(tokens = [" 193 "(identifier = 'foo', startByte = 0, endByte = 3), " 194 "(identifier = 'bar', startByte = 4, endByte = 7)" 195 "], line = void, startByte = 0, endByte = 8)" 196 "])", 197 doLex<LexedStatements>("foo bar;").cStr()); 198 199 EXPECT_STREQ( 200 "(statements = [" 201 "(tokens = [" 202 "(identifier = 'foo', startByte = 0, endByte = 3)" 203 "], line = void, startByte = 0, endByte = 4), " 204 "(tokens = [" 205 "(identifier = 'bar', startByte = 5, endByte = 8)" 206 "], line = void, startByte = 5, endByte = 9), " 207 "(tokens = [" 208 "(identifier = 'baz', startByte = 10, endByte = 13)" 209 "], line = void, startByte = 10, endByte = 14)" 210 "])", 211 doLex<LexedStatements>("foo; bar; baz; ").cStr()); 212 213 EXPECT_STREQ( 214 "(statements = [" 215 "(" 216 "tokens = [" 217 "(identifier = 'foo', startByte = 0, endByte = 3)" 218 "], " 219 "block = [" 220 "(tokens = [" 221 "(identifier = 'bar', startByte = 5, endByte = 8)" 222 "], line = void, startByte = 5, endByte = 9), " 223 "(tokens = [" 224 "(identifier = 'baz', startByte = 10, endByte = 13)" 225 "], line = void, startByte = 10, endByte = 14)" 226 "], " 227 "startByte = 0, endByte = 15" 228 "), " 229 "(tokens = [" 230 "(identifier = 'qux', startByte = 16, endByte = 19)" 231 "], line = void, startByte = 16, endByte = 20)" 232 "])", 233 doLex<LexedStatements>("foo {bar; baz;} qux;").cStr()); 234 } 235 236 TEST(Lexer, DocComments) { 237 EXPECT_STREQ( 238 "(statements = [" 239 "(" 240 "tokens = [" 241 "(identifier = 'foo', startByte = 0, endByte = 3)" 242 "], " 243 "line = void, " 244 "docComment = 'blah blah\\n', " 245 "startByte = 0, endByte = 16" 246 ")" 247 "])", 248 doLex<LexedStatements>("foo; # blah blah").cStr()); 249 250 EXPECT_STREQ( 251 "(statements = [" 252 "(" 253 "tokens = [" 254 "(identifier = 'foo', startByte = 0, endByte = 3)" 255 "], " 256 "line = void, " 257 "docComment = 'blah blah\\n', " 258 "startByte = 0, endByte = 15" 259 ")" 260 "])", 261 doLex<LexedStatements>("foo; #blah blah").cStr()); 262 263 EXPECT_STREQ( 264 "(statements = [" 265 "(" 266 "tokens = [" 267 "(identifier = 'foo', startByte = 0, endByte = 3)" 268 "], " 269 "line = void, " 270 "docComment = ' blah blah\\n', " 271 "startByte = 0, endByte = 17" 272 ")" 273 "])", 274 doLex<LexedStatements>("foo; # blah blah").cStr()); 275 276 EXPECT_STREQ( 277 "(statements = [" 278 "(" 279 "tokens = [" 280 "(identifier = 'foo', startByte = 0, endByte = 3)" 281 "], " 282 "line = void, " 283 "docComment = 'blah blah\\n', " 284 "startByte = 0, endByte = 16" 285 ")" 286 "])", 287 doLex<LexedStatements>("foo;\n# blah blah").cStr()); 288 289 EXPECT_STREQ( 290 "(statements = [" 291 "(" 292 "tokens = [" 293 "(identifier = 'foo', startByte = 0, endByte = 3)" 294 "], " 295 "line = void, " 296 "startByte = 0, endByte = 4" 297 ")" 298 "])", 299 doLex<LexedStatements>("foo;\n\n# blah blah").cStr()); 300 301 EXPECT_STREQ( 302 "(statements = [" 303 "(" 304 "tokens = [" 305 "(identifier = 'foo', startByte = 0, endByte = 3)" 306 "], " 307 "line = void, " 308 "docComment = 'bar baz\\nqux corge\\n', " 309 "startByte = 0, endByte = 30" 310 ")" 311 "])", 312 doLex<LexedStatements>("foo;\n # bar baz\n # qux corge\n\n# grault\n# garply").cStr()); 313 314 EXPECT_STREQ( 315 "(statements = [" 316 "(" 317 "tokens = [" 318 "(identifier = 'foo', startByte = 0, endByte = 3)" 319 "], " 320 "block = [" 321 "(tokens = [" 322 "(identifier = 'bar', startByte = 17, endByte = 20)" 323 "], line = void, docComment = 'hi\\n', startByte = 17, endByte = 27), " 324 "(tokens = [" 325 "(identifier = 'baz', startByte = 28, endByte = 31)" 326 "], line = void, startByte = 28, endByte = 32)" 327 "], " 328 "docComment = 'blah blah\\n', " 329 "startByte = 0, endByte = 44" 330 "), " 331 "(tokens = [" 332 "(identifier = 'qux', startByte = 44, endByte = 47)" 333 "], line = void, startByte = 44, endByte = 48)" 334 "])", 335 doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr()); 336 337 EXPECT_STREQ( 338 "(statements = [" 339 "(" 340 "tokens = [" 341 "(identifier = 'foo', startByte = 0, endByte = 3)" 342 "], " 343 "block = [" 344 "(tokens = [" 345 "(identifier = 'bar', startByte = 5, endByte = 8)" 346 "], line = void, startByte = 5, endByte = 9), " 347 "(tokens = [" 348 "(identifier = 'baz', startByte = 10, endByte = 13)" 349 "], line = void, startByte = 10, endByte = 14)" 350 "], " 351 "docComment = 'late comment\\n', " 352 "startByte = 0, endByte = 31" 353 "), " 354 "(tokens = [" 355 "(identifier = 'qux', startByte = 31, endByte = 34)" 356 "], line = void, startByte = 31, endByte = 35)" 357 "])", 358 doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr()); 359 } 360 361 TEST(Lexer, Utf8Bom) { 362 EXPECT_STREQ( 363 "(tokens = [" 364 "(identifier = 'foo', startByte = 3, endByte = 6), " 365 "(identifier = 'bar', startByte = 7, endByte = 10), " 366 "(identifier = 'baz', startByte = 13, endByte = 16)" 367 "])", 368 doLex<LexedTokens>("\xef\xbb\xbf""foo bar\xef\xbb\xbf""baz").cStr()); 369 } 370 371 } // namespace 372 } // namespace compiler 373 } // namespace capnp