yaml-cpp

FORK: A YAML parser and emitter in C++
git clone https://git.neptards.moe/neptards/yaml-cpp.git
Log | Files | Refs | README | LICENSE

scantoken.cpp (11451B)


      1 #include <sstream>
      2 
      3 #include "exp.h"
      4 #include "regex_yaml.h"
      5 #include "regeximpl.h"
      6 #include "scanner.h"
      7 #include "scanscalar.h"
      8 #include "scantag.h"  // IWYU pragma: keep
      9 #include "tag.h"      // IWYU pragma: keep
     10 #include "token.h"
     11 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
     12 #include "yaml-cpp/mark.h"
     13 
     14 namespace YAML {
     15 ///////////////////////////////////////////////////////////////////////
     16 // Specialization for scanning specific tokens
     17 
     18 // Directive
     19 // . Note: no semantic checking is done here (that's for the parser to do)
     20 void Scanner::ScanDirective() {
     21   std::string name;
     22   std::vector<std::string> params;
     23 
     24   // pop indents and simple keys
     25   PopAllIndents();
     26   PopAllSimpleKeys();
     27 
     28   m_simpleKeyAllowed = false;
     29   m_canBeJSONFlow = false;
     30 
     31   // store pos and eat indicator
     32   Token token(Token::DIRECTIVE, INPUT.mark());
     33   INPUT.eat(1);
     34 
     35   // read name
     36   while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
     37     token.value += INPUT.get();
     38 
     39   // read parameters
     40   while (true) {
     41     // first get rid of whitespace
     42     while (Exp::Blank().Matches(INPUT))
     43       INPUT.eat(1);
     44 
     45     // break on newline or comment
     46     if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
     47       break;
     48 
     49     // now read parameter
     50     std::string param;
     51     while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
     52       param += INPUT.get();
     53 
     54     token.params.push_back(param);
     55   }
     56 
     57   m_tokens.push(token);
     58 }
     59 
     60 // DocStart
     61 void Scanner::ScanDocStart() {
     62   PopAllIndents();
     63   PopAllSimpleKeys();
     64   m_simpleKeyAllowed = false;
     65   m_canBeJSONFlow = false;
     66 
     67   // eat
     68   Mark mark = INPUT.mark();
     69   INPUT.eat(3);
     70   m_tokens.push(Token(Token::DOC_START, mark));
     71 }
     72 
     73 // DocEnd
     74 void Scanner::ScanDocEnd() {
     75   PopAllIndents();
     76   PopAllSimpleKeys();
     77   m_simpleKeyAllowed = false;
     78   m_canBeJSONFlow = false;
     79 
     80   // eat
     81   Mark mark = INPUT.mark();
     82   INPUT.eat(3);
     83   m_tokens.push(Token(Token::DOC_END, mark));
     84 }
     85 
     86 // FlowStart
     87 void Scanner::ScanFlowStart() {
     88   // flows can be simple keys
     89   InsertPotentialSimpleKey();
     90   m_simpleKeyAllowed = true;
     91   m_canBeJSONFlow = false;
     92 
     93   // eat
     94   Mark mark = INPUT.mark();
     95   char ch = INPUT.get();
     96   FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
     97   m_flows.push(flowType);
     98   Token::TYPE type =
     99       (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
    100   m_tokens.push(Token(type, mark));
    101 }
    102 
    103 // FlowEnd
    104 void Scanner::ScanFlowEnd() {
    105   if (InBlockContext())
    106     throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
    107 
    108   // we might have a solo entry in the flow context
    109   if (InFlowContext()) {
    110     if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
    111       m_tokens.push(Token(Token::VALUE, INPUT.mark()));
    112     else if (m_flows.top() == FLOW_SEQ)
    113       InvalidateSimpleKey();
    114   }
    115 
    116   m_simpleKeyAllowed = false;
    117   m_canBeJSONFlow = true;
    118 
    119   // eat
    120   Mark mark = INPUT.mark();
    121   char ch = INPUT.get();
    122 
    123   // check that it matches the start
    124   FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
    125   if (m_flows.top() != flowType)
    126     throw ParserException(mark, ErrorMsg::FLOW_END);
    127   m_flows.pop();
    128 
    129   Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
    130   m_tokens.push(Token(type, mark));
    131 }
    132 
    133 // FlowEntry
    134 void Scanner::ScanFlowEntry() {
    135   // we might have a solo entry in the flow context
    136   if (InFlowContext()) {
    137     if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
    138       m_tokens.push(Token(Token::VALUE, INPUT.mark()));
    139     else if (m_flows.top() == FLOW_SEQ)
    140       InvalidateSimpleKey();
    141   }
    142 
    143   m_simpleKeyAllowed = true;
    144   m_canBeJSONFlow = false;
    145 
    146   // eat
    147   Mark mark = INPUT.mark();
    148   INPUT.eat(1);
    149   m_tokens.push(Token(Token::FLOW_ENTRY, mark));
    150 }
    151 
    152 // BlockEntry
    153 void Scanner::ScanBlockEntry() {
    154   // we better be in the block context!
    155   if (InFlowContext())
    156     throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
    157 
    158   // can we put it here?
    159   if (!m_simpleKeyAllowed)
    160     throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
    161 
    162   PushIndentTo(INPUT.column(), IndentMarker::SEQ);
    163   m_simpleKeyAllowed = true;
    164   m_canBeJSONFlow = false;
    165 
    166   // eat
    167   Mark mark = INPUT.mark();
    168   INPUT.eat(1);
    169   m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
    170 }
    171 
    172 // Key
    173 void Scanner::ScanKey() {
    174   // handle keys differently in the block context (and manage indents)
    175   if (InBlockContext()) {
    176     if (!m_simpleKeyAllowed)
    177       throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
    178 
    179     PushIndentTo(INPUT.column(), IndentMarker::MAP);
    180   }
    181 
    182   // can only put a simple key here if we're in block context
    183   m_simpleKeyAllowed = InBlockContext();
    184 
    185   // eat
    186   Mark mark = INPUT.mark();
    187   INPUT.eat(1);
    188   m_tokens.push(Token(Token::KEY, mark));
    189 }
    190 
    191 // Value
    192 void Scanner::ScanValue() {
    193   // and check that simple key
    194   bool isSimpleKey = VerifySimpleKey();
    195   m_canBeJSONFlow = false;
    196 
    197   if (isSimpleKey) {
    198     // can't follow a simple key with another simple key (dunno why, though - it
    199     // seems fine)
    200     m_simpleKeyAllowed = false;
    201   } else {
    202     // handle values differently in the block context (and manage indents)
    203     if (InBlockContext()) {
    204       if (!m_simpleKeyAllowed)
    205         throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
    206 
    207       PushIndentTo(INPUT.column(), IndentMarker::MAP);
    208     }
    209 
    210     // can only put a simple key here if we're in block context
    211     m_simpleKeyAllowed = InBlockContext();
    212   }
    213 
    214   // eat
    215   Mark mark = INPUT.mark();
    216   INPUT.eat(1);
    217   m_tokens.push(Token(Token::VALUE, mark));
    218 }
    219 
    220 // AnchorOrAlias
    221 void Scanner::ScanAnchorOrAlias() {
    222   bool alias;
    223   std::string name;
    224 
    225   // insert a potential simple key
    226   InsertPotentialSimpleKey();
    227   m_simpleKeyAllowed = false;
    228   m_canBeJSONFlow = false;
    229 
    230   // eat the indicator
    231   Mark mark = INPUT.mark();
    232   char indicator = INPUT.get();
    233   alias = (indicator == Keys::Alias);
    234 
    235   // now eat the content
    236   while (INPUT && Exp::Anchor().Matches(INPUT))
    237     name += INPUT.get();
    238 
    239   // we need to have read SOMETHING!
    240   if (name.empty())
    241     throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
    242                                               : ErrorMsg::ANCHOR_NOT_FOUND);
    243 
    244   // and needs to end correctly
    245   if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
    246     throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
    247                                               : ErrorMsg::CHAR_IN_ANCHOR);
    248 
    249   // and we're done
    250   Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
    251   token.value = name;
    252   m_tokens.push(token);
    253 }
    254 
    255 // Tag
    256 void Scanner::ScanTag() {
    257   // insert a potential simple key
    258   InsertPotentialSimpleKey();
    259   m_simpleKeyAllowed = false;
    260   m_canBeJSONFlow = false;
    261 
    262   Token token(Token::TAG, INPUT.mark());
    263 
    264   // eat the indicator
    265   INPUT.get();
    266 
    267   if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
    268     std::string tag = ScanVerbatimTag(INPUT);
    269 
    270     token.value = tag;
    271     token.data = Tag::VERBATIM;
    272   } else {
    273     bool canBeHandle;
    274     token.value = ScanTagHandle(INPUT, canBeHandle);
    275     if (!canBeHandle && token.value.empty())
    276       token.data = Tag::NON_SPECIFIC;
    277     else if (token.value.empty())
    278       token.data = Tag::SECONDARY_HANDLE;
    279     else
    280       token.data = Tag::PRIMARY_HANDLE;
    281 
    282     // is there a suffix?
    283     if (canBeHandle && INPUT.peek() == Keys::Tag) {
    284       // eat the indicator
    285       INPUT.get();
    286       token.params.push_back(ScanTagSuffix(INPUT));
    287       token.data = Tag::NAMED_HANDLE;
    288     }
    289   }
    290 
    291   m_tokens.push(token);
    292 }
    293 
    294 // PlainScalar
    295 void Scanner::ScanPlainScalar() {
    296   std::string scalar;
    297 
    298   // set up the scanning parameters
    299   ScanScalarParams params;
    300   params.end =
    301       (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
    302   params.eatEnd = false;
    303   params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
    304   params.fold = FOLD_FLOW;
    305   params.eatLeadingWhitespace = true;
    306   params.trimTrailingSpaces = true;
    307   params.chomp = STRIP;
    308   params.onDocIndicator = BREAK;
    309   params.onTabInIndentation = THROW;
    310 
    311   // insert a potential simple key
    312   InsertPotentialSimpleKey();
    313 
    314   Mark mark = INPUT.mark();
    315   scalar = ScanScalar(INPUT, params);
    316 
    317   // can have a simple key only if we ended the scalar by starting a new line
    318   m_simpleKeyAllowed = params.leadingSpaces;
    319   m_canBeJSONFlow = false;
    320 
    321   // finally, check and see if we ended on an illegal character
    322   // if(Exp::IllegalCharInScalar.Matches(INPUT))
    323   //	throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
    324 
    325   Token token(Token::PLAIN_SCALAR, mark);
    326   token.value = scalar;
    327   m_tokens.push(token);
    328 }
    329 
    330 // QuotedScalar
    331 void Scanner::ScanQuotedScalar() {
    332   std::string scalar;
    333 
    334   // peek at single or double quote (don't eat because we need to preserve (for
    335   // the time being) the input position)
    336   char quote = INPUT.peek();
    337   bool single = (quote == '\'');
    338 
    339   // setup the scanning parameters
    340   ScanScalarParams params;
    341   RegEx end = (single ? RegEx(quote) & !Exp::EscSingleQuote() : RegEx(quote));
    342   params.end = &end;
    343   params.eatEnd = true;
    344   params.escape = (single ? '\'' : '\\');
    345   params.indent = 0;
    346   params.fold = FOLD_FLOW;
    347   params.eatLeadingWhitespace = true;
    348   params.trimTrailingSpaces = false;
    349   params.chomp = CLIP;
    350   params.onDocIndicator = THROW;
    351 
    352   // insert a potential simple key
    353   InsertPotentialSimpleKey();
    354 
    355   Mark mark = INPUT.mark();
    356 
    357   // now eat that opening quote
    358   INPUT.get();
    359 
    360   // and scan
    361   scalar = ScanScalar(INPUT, params);
    362   m_simpleKeyAllowed = false;
    363   m_canBeJSONFlow = true;
    364 
    365   Token token(Token::NON_PLAIN_SCALAR, mark);
    366   token.value = scalar;
    367   m_tokens.push(token);
    368 }
    369 
    370 // BlockScalarToken
    371 // . These need a little extra processing beforehand.
    372 // . We need to scan the line where the indicator is (this doesn't count as part
    373 // of the scalar),
    374 //   and then we need to figure out what level of indentation we'll be using.
    375 void Scanner::ScanBlockScalar() {
    376   std::string scalar;
    377 
    378   ScanScalarParams params;
    379   params.indent = 1;
    380   params.detectIndent = true;
    381 
    382   // eat block indicator ('|' or '>')
    383   Mark mark = INPUT.mark();
    384   char indicator = INPUT.get();
    385   params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
    386 
    387   // eat chomping/indentation indicators
    388   params.chomp = CLIP;
    389   int n = Exp::Chomp().Match(INPUT);
    390   for (int i = 0; i < n; i++) {
    391     char ch = INPUT.get();
    392     if (ch == '+')
    393       params.chomp = KEEP;
    394     else if (ch == '-')
    395       params.chomp = STRIP;
    396     else if (Exp::Digit().Matches(ch)) {
    397       if (ch == '0')
    398         throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
    399 
    400       params.indent = ch - '0';
    401       params.detectIndent = false;
    402     }
    403   }
    404 
    405   // now eat whitespace
    406   while (Exp::Blank().Matches(INPUT))
    407     INPUT.eat(1);
    408 
    409   // and comments to the end of the line
    410   if (Exp::Comment().Matches(INPUT))
    411     while (INPUT && !Exp::Break().Matches(INPUT))
    412       INPUT.eat(1);
    413 
    414   // if it's not a line break, then we ran into a bad character inline
    415   if (INPUT && !Exp::Break().Matches(INPUT))
    416     throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
    417 
    418   // set the initial indentation
    419   if (GetTopIndent() >= 0)
    420     params.indent += GetTopIndent();
    421 
    422   params.eatLeadingWhitespace = false;
    423   params.trimTrailingSpaces = false;
    424   params.onTabInIndentation = THROW;
    425 
    426   scalar = ScanScalar(INPUT, params);
    427 
    428   // simple keys always ok after block scalars (since we're gonna start a new
    429   // line anyways)
    430   m_simpleKeyAllowed = true;
    431   m_canBeJSONFlow = false;
    432 
    433   Token token(Token::NON_PLAIN_SCALAR, mark);
    434   token.value = scalar;
    435   m_tokens.push(token);
    436 }
    437 }  // namespace YAML