capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

schema-parser.c++ (15575B)


      1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 #include "schema-parser.h"
     23 #include "message.h"
     24 #include <capnp/compiler/compiler.h>
     25 #include <capnp/compiler/lexer.capnp.h>
     26 #include <capnp/compiler/lexer.h>
     27 #include <capnp/compiler/grammar.capnp.h>
     28 #include <capnp/compiler/parser.h>
     29 #include <unordered_map>
     30 #include <kj/mutex.h>
     31 #include <kj/vector.h>
     32 #include <kj/debug.h>
     33 #include <kj/io.h>
     34 #include <map>
     35 
     36 namespace capnp {
     37 
     38 namespace {
     39 
     40 template <typename T>
     41 size_t findLargestElementBefore(const kj::Vector<T>& vec, const T& key) {
     42   KJ_REQUIRE(vec.size() > 0 && vec[0] <= key);
     43 
     44   size_t lower = 0;
     45   size_t upper = vec.size();
     46 
     47   while (upper - lower > 1) {
     48     size_t mid = (lower + upper) / 2;
     49     if (vec[mid] > key) {
     50       upper = mid;
     51     } else {
     52       lower = mid;
     53     }
     54   }
     55 
     56   return lower;
     57 }
     58 
     59 }  // namespace
     60 
     61 // =======================================================================================
     62 
     63 class SchemaParser::ModuleImpl final: public compiler::Module {
     64 public:
     65   ModuleImpl(const SchemaParser& parser, kj::Own<const SchemaFile>&& file)
     66       : parser(parser), file(kj::mv(file)) {}
     67 
     68   kj::StringPtr getSourceName() override {
     69     return file->getDisplayName();
     70   }
     71 
     72   Orphan<compiler::ParsedFile> loadContent(Orphanage orphanage) override {
     73     kj::Array<const char> content = file->readContent();
     74 
     75     lineBreaks.get([&](kj::SpaceFor<kj::Vector<uint>>& space) {
     76       auto vec = space.construct(content.size() / 40);
     77       vec->add(0);
     78       for (const char* pos = content.begin(); pos < content.end(); ++pos) {
     79         if (*pos == '\n') {
     80           vec->add(pos + 1 - content.begin());
     81         }
     82       }
     83       return vec;
     84     });
     85 
     86     MallocMessageBuilder lexedBuilder;
     87     auto statements = lexedBuilder.initRoot<compiler::LexedStatements>();
     88     compiler::lex(content, statements, *this);
     89 
     90     auto parsed = orphanage.newOrphan<compiler::ParsedFile>();
     91     compiler::parseFile(statements.getStatements(), parsed.get(), *this);
     92     return parsed;
     93   }
     94 
     95   kj::Maybe<Module&> importRelative(kj::StringPtr importPath) override {
     96     KJ_IF_MAYBE(importedFile, file->import(importPath)) {
     97       return parser.getModuleImpl(kj::mv(*importedFile));
     98     } else {
     99       return nullptr;
    100     }
    101   }
    102 
    103   kj::Maybe<kj::Array<const byte>> embedRelative(kj::StringPtr embedPath) override {
    104     KJ_IF_MAYBE(importedFile, file->import(embedPath)) {
    105       return importedFile->get()->readContent().releaseAsBytes();
    106     } else {
    107       return nullptr;
    108     }
    109   }
    110 
    111   void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override {
    112     auto& lines = lineBreaks.get(
    113         [](kj::SpaceFor<kj::Vector<uint>>& space) {
    114           KJ_FAIL_REQUIRE("Can't report errors until loadContent() is called.");
    115           return space.construct();
    116         });
    117 
    118     // TODO(someday):  This counts tabs as single characters.  Do we care?
    119     uint startLine = findLargestElementBefore(lines, startByte);
    120     uint startCol = startByte - lines[startLine];
    121     uint endLine = findLargestElementBefore(lines, endByte);
    122     uint endCol = endByte - lines[endLine];
    123 
    124     file->reportError(
    125         SchemaFile::SourcePos { startByte, startLine, startCol },
    126         SchemaFile::SourcePos { endByte, endLine, endCol },
    127         message);
    128 
    129     // We intentionally only set hadErrors true if reportError() didn't throw.
    130     parser.hadErrors = true;
    131   }
    132 
    133   bool hadErrors() override {
    134     return parser.hadErrors;
    135   }
    136 
    137 private:
    138   const SchemaParser& parser;
    139   kj::Own<const SchemaFile> file;
    140 
    141   kj::Lazy<kj::Vector<uint>> lineBreaks;
    142   // Byte offsets of the first byte in each source line.  The first element is always zero.
    143   // Initialized the first time the module is loaded.
    144 };
    145 
    146 // =======================================================================================
    147 
    148 namespace {
    149 
    150 struct SchemaFileHash {
    151   inline bool operator()(const SchemaFile* f) const {
    152     return f->hashCode();
    153   }
    154 };
    155 
    156 struct SchemaFileEq {
    157   inline bool operator()(const SchemaFile* a, const SchemaFile* b) const {
    158     return *a == *b;
    159   }
    160 };
    161 
    162 }  // namespace
    163 
    164 struct SchemaParser::DiskFileCompat {
    165   // Stuff we only create if parseDiskFile() is ever called, in order to translate that call into
    166   // KJ filesystem API calls.
    167 
    168   kj::Own<kj::Filesystem> ownFs;
    169   kj::Filesystem& fs;
    170 
    171   struct ImportDir {
    172     kj::String pathStr;
    173     kj::Path path;
    174     kj::Own<const kj::ReadableDirectory> dir;
    175   };
    176   std::map<kj::StringPtr, ImportDir> cachedImportDirs;
    177 
    178   std::map<std::pair<const kj::StringPtr*, size_t>, kj::Array<const kj::ReadableDirectory*>>
    179       cachedImportPaths;
    180 
    181   DiskFileCompat(): ownFs(kj::newDiskFilesystem()), fs(*ownFs) {}
    182   DiskFileCompat(kj::Filesystem& fs): fs(fs) {}
    183 };
    184 
    185 struct SchemaParser::Impl {
    186   typedef std::unordered_map<
    187       const SchemaFile*, kj::Own<ModuleImpl>, SchemaFileHash, SchemaFileEq> FileMap;
    188   kj::MutexGuarded<FileMap> fileMap;
    189   compiler::Compiler compiler;
    190 
    191   kj::MutexGuarded<kj::Maybe<DiskFileCompat>> compat;
    192 };
    193 
    194 SchemaParser::SchemaParser(): impl(kj::heap<Impl>()) {}
    195 SchemaParser::~SchemaParser() noexcept(false) {}
    196 
    197 ParsedSchema SchemaParser::parseFromDirectory(
    198     const kj::ReadableDirectory& baseDir, kj::Path path,
    199     kj::ArrayPtr<const kj::ReadableDirectory* const> importPath) const {
    200   return parseFile(SchemaFile::newFromDirectory(baseDir, kj::mv(path), importPath));
    201 }
    202 
    203 ParsedSchema SchemaParser::parseDiskFile(
    204     kj::StringPtr displayName, kj::StringPtr diskPath,
    205     kj::ArrayPtr<const kj::StringPtr> importPath) const {
    206   auto lock = impl->compat.lockExclusive();
    207   DiskFileCompat* compat;
    208   KJ_IF_MAYBE(c, *lock) {
    209     compat = c;
    210   } else {
    211     compat = &lock->emplace();
    212   }
    213 
    214   auto& root = compat->fs.getRoot();
    215   auto cwd = compat->fs.getCurrentPath();
    216 
    217   const kj::ReadableDirectory* baseDir = &root;
    218   kj::Path path = cwd.evalNative(diskPath);
    219 
    220   kj::ArrayPtr<const kj::ReadableDirectory* const> translatedImportPath = nullptr;
    221 
    222   if (importPath.size() > 0) {
    223     auto importPathKey = std::make_pair(importPath.begin(), importPath.size());
    224     auto& slot = compat->cachedImportPaths[importPathKey];
    225 
    226     if (slot == nullptr) {
    227       slot = KJ_MAP(path, importPath) -> const kj::ReadableDirectory* {
    228         auto iter = compat->cachedImportDirs.find(path);
    229         if (iter != compat->cachedImportDirs.end()) {
    230           return iter->second.dir;
    231         }
    232 
    233         auto parsed = cwd.evalNative(path);
    234         kj::Own<const kj::ReadableDirectory> dir;
    235         KJ_IF_MAYBE(d, root.tryOpenSubdir(parsed)) {
    236           dir = kj::mv(*d);
    237         } else {
    238           // Ignore paths that don't exist.
    239           dir = kj::newInMemoryDirectory(kj::nullClock());
    240         }
    241 
    242         const kj::ReadableDirectory* result = dir;
    243 
    244         kj::StringPtr pathRef = path;
    245         KJ_ASSERT(compat->cachedImportDirs.insert(std::make_pair(pathRef,
    246             DiskFileCompat::ImportDir { kj::str(path), kj::mv(parsed), kj::mv(dir) })).second);
    247 
    248         return result;
    249       };
    250     }
    251 
    252     translatedImportPath = slot;
    253 
    254     // Check if `path` appears to be inside any of the import path directories. If so, adjust
    255     // to be relative to that directory rather than absolute.
    256     kj::Maybe<DiskFileCompat::ImportDir&> matchedImportDir;
    257     size_t bestMatchLength = 0;
    258     for (auto importDir: importPath) {
    259       auto iter = compat->cachedImportDirs.find(importDir);
    260       KJ_ASSERT(iter != compat->cachedImportDirs.end());
    261 
    262       if (path.startsWith(iter->second.path)) {
    263         // Looks like we're trying to load a file from inside this import path. Treat the import
    264         // path as the base directory.
    265         if (iter->second.path.size() > bestMatchLength) {
    266           bestMatchLength = iter->second.path.size();
    267           matchedImportDir = iter->second;
    268         }
    269       }
    270     }
    271 
    272     KJ_IF_MAYBE(match, matchedImportDir) {
    273       baseDir = match->dir;
    274       path = path.slice(match->path.size(), path.size()).clone();
    275     }
    276   }
    277 
    278   return parseFile(SchemaFile::newFromDirectory(
    279       *baseDir, kj::mv(path), translatedImportPath, kj::str(displayName)));
    280 }
    281 
    282 void SchemaParser::setDiskFilesystem(kj::Filesystem& fs) {
    283   auto lock = impl->compat.lockExclusive();
    284   KJ_REQUIRE(*lock == nullptr, "already called parseDiskFile() or setDiskFilesystem()");
    285   lock->emplace(fs);
    286 }
    287 
    288 ParsedSchema SchemaParser::parseFile(kj::Own<SchemaFile>&& file) const {
    289   KJ_DEFER(impl->compiler.clearWorkspace());
    290   uint64_t id = impl->compiler.add(getModuleImpl(kj::mv(file))).getId();
    291   impl->compiler.eagerlyCompile(id,
    292       compiler::Compiler::NODE | compiler::Compiler::CHILDREN |
    293       compiler::Compiler::DEPENDENCIES | compiler::Compiler::DEPENDENCY_DEPENDENCIES);
    294   return ParsedSchema(impl->compiler.getLoader().get(id), *this);
    295 }
    296 
    297 kj::Maybe<schema::Node::SourceInfo::Reader> SchemaParser::getSourceInfo(Schema schema) const {
    298   return impl->compiler.getSourceInfo(schema.getProto().getId());
    299 }
    300 
    301 SchemaParser::ModuleImpl& SchemaParser::getModuleImpl(kj::Own<SchemaFile>&& file) const {
    302   auto lock = impl->fileMap.lockExclusive();
    303 
    304   auto insertResult = lock->insert(std::make_pair(file.get(), kj::Own<ModuleImpl>()));
    305   if (insertResult.second) {
    306     // This is a newly-inserted entry.  Construct the ModuleImpl.
    307     insertResult.first->second = kj::heap<ModuleImpl>(*this, kj::mv(file));
    308   }
    309   return *insertResult.first->second;
    310 }
    311 
    312 SchemaLoader& SchemaParser::getLoader() {
    313   return impl->compiler.getLoader();
    314 }
    315 
    316 kj::Maybe<ParsedSchema> ParsedSchema::findNested(kj::StringPtr name) const {
    317   // TODO(someday): lookup() doesn't handle generics correctly. Use the ModuleScope/CompiledType
    318   //   interface instead. We can also add an applybrand() method to ParsedSchema using those
    319   //   interfaces, which would allow us to expose generics more explicitly to e.g. Python.
    320   return parser->impl->compiler.lookup(getProto().getId(), name).map(
    321       [this](uint64_t childId) {
    322         return ParsedSchema(parser->impl->compiler.getLoader().get(childId), *parser);
    323       });
    324 }
    325 
    326 ParsedSchema ParsedSchema::getNested(kj::StringPtr nestedName) const {
    327   KJ_IF_MAYBE(nested, findNested(nestedName)) {
    328     return *nested;
    329   } else {
    330     KJ_FAIL_REQUIRE("no such nested declaration", getProto().getDisplayName(), nestedName);
    331   }
    332 }
    333 
    334 ParsedSchema::ParsedSchemaList ParsedSchema::getAllNested() const {
    335   return ParsedSchemaList(*this, getProto().getNestedNodes());
    336 }
    337 
    338 schema::Node::SourceInfo::Reader ParsedSchema::getSourceInfo() const {
    339   return KJ_ASSERT_NONNULL(parser->getSourceInfo(*this));
    340 }
    341 
    342 // -------------------------------------------------------------------
    343 
    344 ParsedSchema ParsedSchema::ParsedSchemaList::operator[](uint index) const {
    345   return ParsedSchema(
    346     parent.parser->impl->compiler.getLoader().get(list[index].getId()),
    347     *parent.parser);
    348 }
    349 
    350 // -------------------------------------------------------------------
    351 
    352 class SchemaFile::DiskSchemaFile final: public SchemaFile {
    353 public:
    354   DiskSchemaFile(const kj::ReadableDirectory& baseDir, kj::Path pathParam,
    355                  kj::ArrayPtr<const kj::ReadableDirectory* const> importPath,
    356                  kj::Own<const kj::ReadableFile> file,
    357                  kj::Maybe<kj::String> displayNameOverride)
    358       : baseDir(baseDir), path(kj::mv(pathParam)), importPath(importPath), file(kj::mv(file)) {
    359     KJ_IF_MAYBE(dn, displayNameOverride) {
    360       displayName = kj::mv(*dn);
    361       displayNameOverridden = true;
    362     } else {
    363       displayName = path.toString();
    364       displayNameOverridden = false;
    365     }
    366   }
    367 
    368   kj::StringPtr getDisplayName() const override {
    369     return displayName;
    370   }
    371 
    372   kj::Array<const char> readContent() const override {
    373     return file->mmap(0, file->stat().size).releaseAsChars();
    374   }
    375 
    376   kj::Maybe<kj::Own<SchemaFile>> import(kj::StringPtr target) const override {
    377     if (target.startsWith("/")) {
    378       auto parsed = kj::Path::parse(target.slice(1));
    379       for (auto candidate: importPath) {
    380         KJ_IF_MAYBE(newFile, candidate->tryOpenFile(parsed)) {
    381           return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
    382               *candidate, kj::mv(parsed), importPath, kj::mv(*newFile), nullptr));
    383         }
    384       }
    385       return nullptr;
    386     } else {
    387       auto parsed = path.parent().eval(target);
    388 
    389       kj::Maybe<kj::String> displayNameOverride;
    390       if (displayNameOverridden) {
    391         // Try to create a consistent display name override for the imported file. This is for
    392         // backwards-compatibility only -- display names are only overridden when using the
    393         // deprecated parseDiskFile() interface.
    394         kj::runCatchingExceptions([&]() {
    395           displayNameOverride = kj::Path::parse(displayName).parent().eval(target).toString();
    396         });
    397       }
    398 
    399       KJ_IF_MAYBE(newFile, baseDir.tryOpenFile(parsed)) {
    400         return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
    401             baseDir, kj::mv(parsed), importPath, kj::mv(*newFile), kj::mv(displayNameOverride)));
    402       } else {
    403         return nullptr;
    404       }
    405     }
    406   }
    407 
    408   bool operator==(const SchemaFile& other) const override {
    409     auto& other2 = kj::downcast<const DiskSchemaFile>(other);
    410     return &baseDir == &other2.baseDir && path == other2.path;
    411   }
    412   bool operator!=(const SchemaFile& other) const override {
    413     return !operator==(other);
    414   }
    415   size_t hashCode() const override {
    416     // djb hash with xor
    417     // TODO(someday):  Add hashing library to KJ.
    418     size_t result = reinterpret_cast<uintptr_t>(&baseDir);
    419     for (auto& part: path) {
    420       for (char c: part) {
    421         result = (result * 33) ^ c;
    422       }
    423       result = (result * 33) ^ '/';
    424     }
    425     return result;
    426   }
    427 
    428   void reportError(SourcePos start, SourcePos end, kj::StringPtr message) const override {
    429     kj::getExceptionCallback().onRecoverableException(kj::Exception(
    430         kj::Exception::Type::FAILED, path.toString(), start.line,
    431         kj::heapString(message)));
    432   }
    433 
    434 private:
    435   const kj::ReadableDirectory& baseDir;
    436   kj::Path path;
    437   kj::ArrayPtr<const kj::ReadableDirectory* const> importPath;
    438   kj::Own<const kj::ReadableFile> file;
    439   kj::String displayName;
    440   bool displayNameOverridden;
    441 };
    442 
    443 kj::Own<SchemaFile> SchemaFile::newFromDirectory(
    444     const kj::ReadableDirectory& baseDir, kj::Path path,
    445     kj::ArrayPtr<const kj::ReadableDirectory* const> importPath,
    446     kj::Maybe<kj::String> displayNameOverride) {
    447   return kj::heap<DiskSchemaFile>(baseDir, kj::mv(path), importPath, baseDir.openFile(path),
    448                                   kj::mv(displayNameOverride));
    449 }
    450 
    451 }  // namespace capnp