schema-parser.c++ (15575B)
1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #include "schema-parser.h" 23 #include "message.h" 24 #include <capnp/compiler/compiler.h> 25 #include <capnp/compiler/lexer.capnp.h> 26 #include <capnp/compiler/lexer.h> 27 #include <capnp/compiler/grammar.capnp.h> 28 #include <capnp/compiler/parser.h> 29 #include <unordered_map> 30 #include <kj/mutex.h> 31 #include <kj/vector.h> 32 #include <kj/debug.h> 33 #include <kj/io.h> 34 #include <map> 35 36 namespace capnp { 37 38 namespace { 39 40 template <typename T> 41 size_t findLargestElementBefore(const kj::Vector<T>& vec, const T& key) { 42 KJ_REQUIRE(vec.size() > 0 && vec[0] <= key); 43 44 size_t lower = 0; 45 size_t upper = vec.size(); 46 47 while (upper - lower > 1) { 48 size_t mid = (lower + upper) / 2; 49 if (vec[mid] > key) { 50 upper = mid; 51 } else { 52 lower = mid; 53 } 54 } 55 56 return lower; 57 } 58 59 } // namespace 60 61 // ======================================================================================= 62 63 class SchemaParser::ModuleImpl final: public compiler::Module { 64 public: 65 ModuleImpl(const SchemaParser& parser, kj::Own<const SchemaFile>&& file) 66 : parser(parser), file(kj::mv(file)) {} 67 68 kj::StringPtr getSourceName() override { 69 return file->getDisplayName(); 70 } 71 72 Orphan<compiler::ParsedFile> loadContent(Orphanage orphanage) override { 73 kj::Array<const char> content = file->readContent(); 74 75 lineBreaks.get([&](kj::SpaceFor<kj::Vector<uint>>& space) { 76 auto vec = space.construct(content.size() / 40); 77 vec->add(0); 78 for (const char* pos = content.begin(); pos < content.end(); ++pos) { 79 if (*pos == '\n') { 80 vec->add(pos + 1 - content.begin()); 81 } 82 } 83 return vec; 84 }); 85 86 MallocMessageBuilder lexedBuilder; 87 auto statements = lexedBuilder.initRoot<compiler::LexedStatements>(); 88 compiler::lex(content, statements, *this); 89 90 auto parsed = orphanage.newOrphan<compiler::ParsedFile>(); 91 compiler::parseFile(statements.getStatements(), parsed.get(), *this); 92 return parsed; 93 } 94 95 kj::Maybe<Module&> importRelative(kj::StringPtr importPath) override { 96 KJ_IF_MAYBE(importedFile, file->import(importPath)) { 97 return parser.getModuleImpl(kj::mv(*importedFile)); 98 } else { 99 return nullptr; 100 } 101 } 102 103 kj::Maybe<kj::Array<const byte>> embedRelative(kj::StringPtr embedPath) override { 104 KJ_IF_MAYBE(importedFile, file->import(embedPath)) { 105 return importedFile->get()->readContent().releaseAsBytes(); 106 } else { 107 return nullptr; 108 } 109 } 110 111 void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override { 112 auto& lines = lineBreaks.get( 113 [](kj::SpaceFor<kj::Vector<uint>>& space) { 114 KJ_FAIL_REQUIRE("Can't report errors until loadContent() is called."); 115 return space.construct(); 116 }); 117 118 // TODO(someday): This counts tabs as single characters. Do we care? 119 uint startLine = findLargestElementBefore(lines, startByte); 120 uint startCol = startByte - lines[startLine]; 121 uint endLine = findLargestElementBefore(lines, endByte); 122 uint endCol = endByte - lines[endLine]; 123 124 file->reportError( 125 SchemaFile::SourcePos { startByte, startLine, startCol }, 126 SchemaFile::SourcePos { endByte, endLine, endCol }, 127 message); 128 129 // We intentionally only set hadErrors true if reportError() didn't throw. 130 parser.hadErrors = true; 131 } 132 133 bool hadErrors() override { 134 return parser.hadErrors; 135 } 136 137 private: 138 const SchemaParser& parser; 139 kj::Own<const SchemaFile> file; 140 141 kj::Lazy<kj::Vector<uint>> lineBreaks; 142 // Byte offsets of the first byte in each source line. The first element is always zero. 143 // Initialized the first time the module is loaded. 144 }; 145 146 // ======================================================================================= 147 148 namespace { 149 150 struct SchemaFileHash { 151 inline bool operator()(const SchemaFile* f) const { 152 return f->hashCode(); 153 } 154 }; 155 156 struct SchemaFileEq { 157 inline bool operator()(const SchemaFile* a, const SchemaFile* b) const { 158 return *a == *b; 159 } 160 }; 161 162 } // namespace 163 164 struct SchemaParser::DiskFileCompat { 165 // Stuff we only create if parseDiskFile() is ever called, in order to translate that call into 166 // KJ filesystem API calls. 167 168 kj::Own<kj::Filesystem> ownFs; 169 kj::Filesystem& fs; 170 171 struct ImportDir { 172 kj::String pathStr; 173 kj::Path path; 174 kj::Own<const kj::ReadableDirectory> dir; 175 }; 176 std::map<kj::StringPtr, ImportDir> cachedImportDirs; 177 178 std::map<std::pair<const kj::StringPtr*, size_t>, kj::Array<const kj::ReadableDirectory*>> 179 cachedImportPaths; 180 181 DiskFileCompat(): ownFs(kj::newDiskFilesystem()), fs(*ownFs) {} 182 DiskFileCompat(kj::Filesystem& fs): fs(fs) {} 183 }; 184 185 struct SchemaParser::Impl { 186 typedef std::unordered_map< 187 const SchemaFile*, kj::Own<ModuleImpl>, SchemaFileHash, SchemaFileEq> FileMap; 188 kj::MutexGuarded<FileMap> fileMap; 189 compiler::Compiler compiler; 190 191 kj::MutexGuarded<kj::Maybe<DiskFileCompat>> compat; 192 }; 193 194 SchemaParser::SchemaParser(): impl(kj::heap<Impl>()) {} 195 SchemaParser::~SchemaParser() noexcept(false) {} 196 197 ParsedSchema SchemaParser::parseFromDirectory( 198 const kj::ReadableDirectory& baseDir, kj::Path path, 199 kj::ArrayPtr<const kj::ReadableDirectory* const> importPath) const { 200 return parseFile(SchemaFile::newFromDirectory(baseDir, kj::mv(path), importPath)); 201 } 202 203 ParsedSchema SchemaParser::parseDiskFile( 204 kj::StringPtr displayName, kj::StringPtr diskPath, 205 kj::ArrayPtr<const kj::StringPtr> importPath) const { 206 auto lock = impl->compat.lockExclusive(); 207 DiskFileCompat* compat; 208 KJ_IF_MAYBE(c, *lock) { 209 compat = c; 210 } else { 211 compat = &lock->emplace(); 212 } 213 214 auto& root = compat->fs.getRoot(); 215 auto cwd = compat->fs.getCurrentPath(); 216 217 const kj::ReadableDirectory* baseDir = &root; 218 kj::Path path = cwd.evalNative(diskPath); 219 220 kj::ArrayPtr<const kj::ReadableDirectory* const> translatedImportPath = nullptr; 221 222 if (importPath.size() > 0) { 223 auto importPathKey = std::make_pair(importPath.begin(), importPath.size()); 224 auto& slot = compat->cachedImportPaths[importPathKey]; 225 226 if (slot == nullptr) { 227 slot = KJ_MAP(path, importPath) -> const kj::ReadableDirectory* { 228 auto iter = compat->cachedImportDirs.find(path); 229 if (iter != compat->cachedImportDirs.end()) { 230 return iter->second.dir; 231 } 232 233 auto parsed = cwd.evalNative(path); 234 kj::Own<const kj::ReadableDirectory> dir; 235 KJ_IF_MAYBE(d, root.tryOpenSubdir(parsed)) { 236 dir = kj::mv(*d); 237 } else { 238 // Ignore paths that don't exist. 239 dir = kj::newInMemoryDirectory(kj::nullClock()); 240 } 241 242 const kj::ReadableDirectory* result = dir; 243 244 kj::StringPtr pathRef = path; 245 KJ_ASSERT(compat->cachedImportDirs.insert(std::make_pair(pathRef, 246 DiskFileCompat::ImportDir { kj::str(path), kj::mv(parsed), kj::mv(dir) })).second); 247 248 return result; 249 }; 250 } 251 252 translatedImportPath = slot; 253 254 // Check if `path` appears to be inside any of the import path directories. If so, adjust 255 // to be relative to that directory rather than absolute. 256 kj::Maybe<DiskFileCompat::ImportDir&> matchedImportDir; 257 size_t bestMatchLength = 0; 258 for (auto importDir: importPath) { 259 auto iter = compat->cachedImportDirs.find(importDir); 260 KJ_ASSERT(iter != compat->cachedImportDirs.end()); 261 262 if (path.startsWith(iter->second.path)) { 263 // Looks like we're trying to load a file from inside this import path. Treat the import 264 // path as the base directory. 265 if (iter->second.path.size() > bestMatchLength) { 266 bestMatchLength = iter->second.path.size(); 267 matchedImportDir = iter->second; 268 } 269 } 270 } 271 272 KJ_IF_MAYBE(match, matchedImportDir) { 273 baseDir = match->dir; 274 path = path.slice(match->path.size(), path.size()).clone(); 275 } 276 } 277 278 return parseFile(SchemaFile::newFromDirectory( 279 *baseDir, kj::mv(path), translatedImportPath, kj::str(displayName))); 280 } 281 282 void SchemaParser::setDiskFilesystem(kj::Filesystem& fs) { 283 auto lock = impl->compat.lockExclusive(); 284 KJ_REQUIRE(*lock == nullptr, "already called parseDiskFile() or setDiskFilesystem()"); 285 lock->emplace(fs); 286 } 287 288 ParsedSchema SchemaParser::parseFile(kj::Own<SchemaFile>&& file) const { 289 KJ_DEFER(impl->compiler.clearWorkspace()); 290 uint64_t id = impl->compiler.add(getModuleImpl(kj::mv(file))).getId(); 291 impl->compiler.eagerlyCompile(id, 292 compiler::Compiler::NODE | compiler::Compiler::CHILDREN | 293 compiler::Compiler::DEPENDENCIES | compiler::Compiler::DEPENDENCY_DEPENDENCIES); 294 return ParsedSchema(impl->compiler.getLoader().get(id), *this); 295 } 296 297 kj::Maybe<schema::Node::SourceInfo::Reader> SchemaParser::getSourceInfo(Schema schema) const { 298 return impl->compiler.getSourceInfo(schema.getProto().getId()); 299 } 300 301 SchemaParser::ModuleImpl& SchemaParser::getModuleImpl(kj::Own<SchemaFile>&& file) const { 302 auto lock = impl->fileMap.lockExclusive(); 303 304 auto insertResult = lock->insert(std::make_pair(file.get(), kj::Own<ModuleImpl>())); 305 if (insertResult.second) { 306 // This is a newly-inserted entry. Construct the ModuleImpl. 307 insertResult.first->second = kj::heap<ModuleImpl>(*this, kj::mv(file)); 308 } 309 return *insertResult.first->second; 310 } 311 312 SchemaLoader& SchemaParser::getLoader() { 313 return impl->compiler.getLoader(); 314 } 315 316 kj::Maybe<ParsedSchema> ParsedSchema::findNested(kj::StringPtr name) const { 317 // TODO(someday): lookup() doesn't handle generics correctly. Use the ModuleScope/CompiledType 318 // interface instead. We can also add an applybrand() method to ParsedSchema using those 319 // interfaces, which would allow us to expose generics more explicitly to e.g. Python. 320 return parser->impl->compiler.lookup(getProto().getId(), name).map( 321 [this](uint64_t childId) { 322 return ParsedSchema(parser->impl->compiler.getLoader().get(childId), *parser); 323 }); 324 } 325 326 ParsedSchema ParsedSchema::getNested(kj::StringPtr nestedName) const { 327 KJ_IF_MAYBE(nested, findNested(nestedName)) { 328 return *nested; 329 } else { 330 KJ_FAIL_REQUIRE("no such nested declaration", getProto().getDisplayName(), nestedName); 331 } 332 } 333 334 ParsedSchema::ParsedSchemaList ParsedSchema::getAllNested() const { 335 return ParsedSchemaList(*this, getProto().getNestedNodes()); 336 } 337 338 schema::Node::SourceInfo::Reader ParsedSchema::getSourceInfo() const { 339 return KJ_ASSERT_NONNULL(parser->getSourceInfo(*this)); 340 } 341 342 // ------------------------------------------------------------------- 343 344 ParsedSchema ParsedSchema::ParsedSchemaList::operator[](uint index) const { 345 return ParsedSchema( 346 parent.parser->impl->compiler.getLoader().get(list[index].getId()), 347 *parent.parser); 348 } 349 350 // ------------------------------------------------------------------- 351 352 class SchemaFile::DiskSchemaFile final: public SchemaFile { 353 public: 354 DiskSchemaFile(const kj::ReadableDirectory& baseDir, kj::Path pathParam, 355 kj::ArrayPtr<const kj::ReadableDirectory* const> importPath, 356 kj::Own<const kj::ReadableFile> file, 357 kj::Maybe<kj::String> displayNameOverride) 358 : baseDir(baseDir), path(kj::mv(pathParam)), importPath(importPath), file(kj::mv(file)) { 359 KJ_IF_MAYBE(dn, displayNameOverride) { 360 displayName = kj::mv(*dn); 361 displayNameOverridden = true; 362 } else { 363 displayName = path.toString(); 364 displayNameOverridden = false; 365 } 366 } 367 368 kj::StringPtr getDisplayName() const override { 369 return displayName; 370 } 371 372 kj::Array<const char> readContent() const override { 373 return file->mmap(0, file->stat().size).releaseAsChars(); 374 } 375 376 kj::Maybe<kj::Own<SchemaFile>> import(kj::StringPtr target) const override { 377 if (target.startsWith("/")) { 378 auto parsed = kj::Path::parse(target.slice(1)); 379 for (auto candidate: importPath) { 380 KJ_IF_MAYBE(newFile, candidate->tryOpenFile(parsed)) { 381 return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>( 382 *candidate, kj::mv(parsed), importPath, kj::mv(*newFile), nullptr)); 383 } 384 } 385 return nullptr; 386 } else { 387 auto parsed = path.parent().eval(target); 388 389 kj::Maybe<kj::String> displayNameOverride; 390 if (displayNameOverridden) { 391 // Try to create a consistent display name override for the imported file. This is for 392 // backwards-compatibility only -- display names are only overridden when using the 393 // deprecated parseDiskFile() interface. 394 kj::runCatchingExceptions([&]() { 395 displayNameOverride = kj::Path::parse(displayName).parent().eval(target).toString(); 396 }); 397 } 398 399 KJ_IF_MAYBE(newFile, baseDir.tryOpenFile(parsed)) { 400 return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>( 401 baseDir, kj::mv(parsed), importPath, kj::mv(*newFile), kj::mv(displayNameOverride))); 402 } else { 403 return nullptr; 404 } 405 } 406 } 407 408 bool operator==(const SchemaFile& other) const override { 409 auto& other2 = kj::downcast<const DiskSchemaFile>(other); 410 return &baseDir == &other2.baseDir && path == other2.path; 411 } 412 bool operator!=(const SchemaFile& other) const override { 413 return !operator==(other); 414 } 415 size_t hashCode() const override { 416 // djb hash with xor 417 // TODO(someday): Add hashing library to KJ. 418 size_t result = reinterpret_cast<uintptr_t>(&baseDir); 419 for (auto& part: path) { 420 for (char c: part) { 421 result = (result * 33) ^ c; 422 } 423 result = (result * 33) ^ '/'; 424 } 425 return result; 426 } 427 428 void reportError(SourcePos start, SourcePos end, kj::StringPtr message) const override { 429 kj::getExceptionCallback().onRecoverableException(kj::Exception( 430 kj::Exception::Type::FAILED, path.toString(), start.line, 431 kj::heapString(message))); 432 } 433 434 private: 435 const kj::ReadableDirectory& baseDir; 436 kj::Path path; 437 kj::ArrayPtr<const kj::ReadableDirectory* const> importPath; 438 kj::Own<const kj::ReadableFile> file; 439 kj::String displayName; 440 bool displayNameOverridden; 441 }; 442 443 kj::Own<SchemaFile> SchemaFile::newFromDirectory( 444 const kj::ReadableDirectory& baseDir, kj::Path path, 445 kj::ArrayPtr<const kj::ReadableDirectory* const> importPath, 446 kj::Maybe<kj::String> displayNameOverride) { 447 return kj::heap<DiskSchemaFile>(baseDir, kj::mv(path), importPath, baseDir.openFile(path), 448 kj::mv(displayNameOverride)); 449 } 450 451 } // namespace capnp