yaml-cpp

FORK: A YAML parser and emitter in C++
git clone https://git.neptards.moe/neptards/yaml-cpp.git
Log | Files | Refs | README | LICENSE

scanner.cpp (9007B)


      1 #include <cassert>
      2 #include <memory>
      3 
      4 #include "exp.h"
      5 #include "scanner.h"
      6 #include "token.h"
      7 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
      8 
      9 namespace YAML {
     10 Scanner::Scanner(std::istream& in)
     11     : INPUT(in),
     12       m_tokens{},
     13       m_startedStream(false),
     14       m_endedStream(false),
     15       m_simpleKeyAllowed(false),
     16       m_canBeJSONFlow(false),
     17       m_simpleKeys{},
     18       m_indents{},
     19       m_indentRefs{},
     20       m_flows{} {}
     21 
     22 Scanner::~Scanner() = default;
     23 
     24 bool Scanner::empty() {
     25   EnsureTokensInQueue();
     26   return m_tokens.empty();
     27 }
     28 
     29 void Scanner::pop() {
     30   EnsureTokensInQueue();
     31   if (!m_tokens.empty())
     32     m_tokens.pop();
     33 }
     34 
     35 Token& Scanner::peek() {
     36   EnsureTokensInQueue();
     37   assert(!m_tokens.empty());  // should we be asserting here? I mean, we really
     38                               // just be checking
     39                               // if it's empty before peeking.
     40 
     41 #if 0
     42 		static Token *pLast = 0;
     43 		if(pLast != &m_tokens.front())
     44 			std::cerr << "peek: " << m_tokens.front() << "\n";
     45 		pLast = &m_tokens.front();
     46 #endif
     47 
     48   return m_tokens.front();
     49 }
     50 
     51 Mark Scanner::mark() const { return INPUT.mark(); }
     52 
     53 void Scanner::EnsureTokensInQueue() {
     54   while (true) {
     55     if (!m_tokens.empty()) {
     56       Token& token = m_tokens.front();
     57 
     58       // if this guy's valid, then we're done
     59       if (token.status == Token::VALID) {
     60         return;
     61       }
     62 
     63       // here's where we clean up the impossible tokens
     64       if (token.status == Token::INVALID) {
     65         m_tokens.pop();
     66         continue;
     67       }
     68 
     69       // note: what's left are the unverified tokens
     70     }
     71 
     72     // no token? maybe we've actually finished
     73     if (m_endedStream) {
     74       return;
     75     }
     76 
     77     // no? then scan...
     78     ScanNextToken();
     79   }
     80 }
     81 
     82 void Scanner::ScanNextToken() {
     83   if (m_endedStream) {
     84     return;
     85   }
     86 
     87   if (!m_startedStream) {
     88     return StartStream();
     89   }
     90 
     91   // get rid of whitespace, etc. (in between tokens it should be irrelevant)
     92   ScanToNextToken();
     93 
     94   // maybe need to end some blocks
     95   PopIndentToHere();
     96 
     97   // *****
     98   // And now branch based on the next few characters!
     99   // *****
    100 
    101   // end of stream
    102   if (!INPUT) {
    103     return EndStream();
    104   }
    105 
    106   if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
    107     return ScanDirective();
    108   }
    109 
    110   // document token
    111   if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
    112     return ScanDocStart();
    113   }
    114 
    115   if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
    116     return ScanDocEnd();
    117   }
    118 
    119   // flow start/end/entry
    120   if (INPUT.peek() == Keys::FlowSeqStart ||
    121       INPUT.peek() == Keys::FlowMapStart) {
    122     return ScanFlowStart();
    123   }
    124 
    125   if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
    126     return ScanFlowEnd();
    127   }
    128 
    129   if (INPUT.peek() == Keys::FlowEntry) {
    130     return ScanFlowEntry();
    131   }
    132 
    133   // block/map stuff
    134   if (Exp::BlockEntry().Matches(INPUT)) {
    135     return ScanBlockEntry();
    136   }
    137 
    138   if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
    139     return ScanKey();
    140   }
    141 
    142   if (GetValueRegex().Matches(INPUT)) {
    143     return ScanValue();
    144   }
    145 
    146   // alias/anchor
    147   if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
    148     return ScanAnchorOrAlias();
    149   }
    150 
    151   // tag
    152   if (INPUT.peek() == Keys::Tag) {
    153     return ScanTag();
    154   }
    155 
    156   // special scalars
    157   if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
    158                            INPUT.peek() == Keys::FoldedScalar)) {
    159     return ScanBlockScalar();
    160   }
    161 
    162   if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
    163     return ScanQuotedScalar();
    164   }
    165 
    166   // plain scalars
    167   if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
    168           .Matches(INPUT)) {
    169     return ScanPlainScalar();
    170   }
    171 
    172   // don't know what it is!
    173   throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
    174 }
    175 
    176 void Scanner::ScanToNextToken() {
    177   while (true) {
    178     // first eat whitespace
    179     while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
    180       if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
    181         m_simpleKeyAllowed = false;
    182       }
    183       INPUT.eat(1);
    184     }
    185 
    186     // then eat a comment
    187     if (Exp::Comment().Matches(INPUT)) {
    188       // eat until line break
    189       while (INPUT && !Exp::Break().Matches(INPUT)) {
    190         INPUT.eat(1);
    191       }
    192     }
    193 
    194     // if it's NOT a line break, then we're done!
    195     if (!Exp::Break().Matches(INPUT)) {
    196       break;
    197     }
    198 
    199     // otherwise, let's eat the line break and keep going
    200     int n = Exp::Break().Match(INPUT);
    201     INPUT.eat(n);
    202 
    203     // oh yeah, and let's get rid of that simple key
    204     InvalidateSimpleKey();
    205 
    206     // new line - we may be able to accept a simple key now
    207     if (InBlockContext()) {
    208       m_simpleKeyAllowed = true;
    209     }
    210   }
    211 }
    212 
    213 ///////////////////////////////////////////////////////////////////////
    214 // Misc. helpers
    215 
    216 // IsWhitespaceToBeEaten
    217 // . We can eat whitespace if it's a space or tab
    218 // . Note: originally tabs in block context couldn't be eaten
    219 //         "where a simple key could be allowed
    220 //         (i.e., not at the beginning of a line, or following '-', '?', or
    221 // ':')"
    222 //   I think this is wrong, since tabs can be non-content whitespace; it's just
    223 //   that they can't contribute to indentation, so once you've seen a tab in a
    224 //   line, you can't start a simple key
    225 bool Scanner::IsWhitespaceToBeEaten(char ch) {
    226   if (ch == ' ') {
    227     return true;
    228   }
    229 
    230   if (ch == '\t') {
    231     return true;
    232   }
    233 
    234   return false;
    235 }
    236 
    237 const RegEx& Scanner::GetValueRegex() const {
    238   if (InBlockContext()) {
    239     return Exp::Value();
    240   }
    241 
    242   return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
    243 }
    244 
    245 void Scanner::StartStream() {
    246   m_startedStream = true;
    247   m_simpleKeyAllowed = true;
    248   std::unique_ptr<IndentMarker> pIndent(
    249       new IndentMarker(-1, IndentMarker::NONE));
    250   m_indentRefs.push_back(std::move(pIndent));
    251   m_indents.push(&m_indentRefs.back());
    252 }
    253 
    254 void Scanner::EndStream() {
    255   // force newline
    256   if (INPUT.column() > 0) {
    257     INPUT.ResetColumn();
    258   }
    259 
    260   PopAllIndents();
    261   PopAllSimpleKeys();
    262 
    263   m_simpleKeyAllowed = false;
    264   m_endedStream = true;
    265 }
    266 
    267 Token* Scanner::PushToken(Token::TYPE type) {
    268   m_tokens.push(Token(type, INPUT.mark()));
    269   return &m_tokens.back();
    270 }
    271 
    272 Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
    273   switch (type) {
    274     case IndentMarker::SEQ:
    275       return Token::BLOCK_SEQ_START;
    276     case IndentMarker::MAP:
    277       return Token::BLOCK_MAP_START;
    278     case IndentMarker::NONE:
    279       assert(false);
    280       break;
    281   }
    282   assert(false);
    283   throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
    284 }
    285 
    286 Scanner::IndentMarker* Scanner::PushIndentTo(int column,
    287                                              IndentMarker::INDENT_TYPE type) {
    288   // are we in flow?
    289   if (InFlowContext()) {
    290     return nullptr;
    291   }
    292 
    293   std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
    294   IndentMarker& indent = *pIndent;
    295   const IndentMarker& lastIndent = *m_indents.top();
    296 
    297   // is this actually an indentation?
    298   if (indent.column < lastIndent.column) {
    299     return nullptr;
    300   }
    301   if (indent.column == lastIndent.column &&
    302       !(indent.type == IndentMarker::SEQ &&
    303         lastIndent.type == IndentMarker::MAP)) {
    304     return nullptr;
    305   }
    306 
    307   // push a start token
    308   indent.pStartToken = PushToken(GetStartTokenFor(type));
    309 
    310   // and then the indent
    311   m_indents.push(&indent);
    312   m_indentRefs.push_back(std::move(pIndent));
    313   return &m_indentRefs.back();
    314 }
    315 
    316 void Scanner::PopIndentToHere() {
    317   // are we in flow?
    318   if (InFlowContext()) {
    319     return;
    320   }
    321 
    322   // now pop away
    323   while (!m_indents.empty()) {
    324     const IndentMarker& indent = *m_indents.top();
    325     if (indent.column < INPUT.column()) {
    326       break;
    327     }
    328     if (indent.column == INPUT.column() &&
    329         !(indent.type == IndentMarker::SEQ &&
    330           !Exp::BlockEntry().Matches(INPUT))) {
    331       break;
    332     }
    333 
    334     PopIndent();
    335   }
    336 
    337   while (!m_indents.empty() &&
    338          m_indents.top()->status == IndentMarker::INVALID) {
    339     PopIndent();
    340   }
    341 }
    342 
    343 void Scanner::PopAllIndents() {
    344   // are we in flow?
    345   if (InFlowContext()) {
    346     return;
    347   }
    348 
    349   // now pop away
    350   while (!m_indents.empty()) {
    351     const IndentMarker& indent = *m_indents.top();
    352     if (indent.type == IndentMarker::NONE) {
    353       break;
    354     }
    355 
    356     PopIndent();
    357   }
    358 }
    359 
    360 void Scanner::PopIndent() {
    361   const IndentMarker& indent = *m_indents.top();
    362   m_indents.pop();
    363 
    364   if (indent.status != IndentMarker::VALID) {
    365     InvalidateSimpleKey();
    366     return;
    367   }
    368 
    369   if (indent.type == IndentMarker::SEQ) {
    370     m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
    371   } else if (indent.type == IndentMarker::MAP) {
    372     m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
    373   }
    374 }
    375 
    376 int Scanner::GetTopIndent() const {
    377   if (m_indents.empty()) {
    378     return 0;
    379   }
    380   return m_indents.top()->column;
    381 }
    382 
    383 void Scanner::ThrowParserException(const std::string& msg) const {
    384   Mark mark = Mark::null_mark();
    385   if (!m_tokens.empty()) {
    386     const Token& token = m_tokens.front();
    387     mark = token.mark;
    388   }
    389   throw ParserException(mark, msg);
    390 }
    391 }  // namespace YAML