scanner.cpp (9007B)
1 #include <cassert> 2 #include <memory> 3 4 #include "exp.h" 5 #include "scanner.h" 6 #include "token.h" 7 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep 8 9 namespace YAML { 10 Scanner::Scanner(std::istream& in) 11 : INPUT(in), 12 m_tokens{}, 13 m_startedStream(false), 14 m_endedStream(false), 15 m_simpleKeyAllowed(false), 16 m_canBeJSONFlow(false), 17 m_simpleKeys{}, 18 m_indents{}, 19 m_indentRefs{}, 20 m_flows{} {} 21 22 Scanner::~Scanner() = default; 23 24 bool Scanner::empty() { 25 EnsureTokensInQueue(); 26 return m_tokens.empty(); 27 } 28 29 void Scanner::pop() { 30 EnsureTokensInQueue(); 31 if (!m_tokens.empty()) 32 m_tokens.pop(); 33 } 34 35 Token& Scanner::peek() { 36 EnsureTokensInQueue(); 37 assert(!m_tokens.empty()); // should we be asserting here? I mean, we really 38 // just be checking 39 // if it's empty before peeking. 40 41 #if 0 42 static Token *pLast = 0; 43 if(pLast != &m_tokens.front()) 44 std::cerr << "peek: " << m_tokens.front() << "\n"; 45 pLast = &m_tokens.front(); 46 #endif 47 48 return m_tokens.front(); 49 } 50 51 Mark Scanner::mark() const { return INPUT.mark(); } 52 53 void Scanner::EnsureTokensInQueue() { 54 while (true) { 55 if (!m_tokens.empty()) { 56 Token& token = m_tokens.front(); 57 58 // if this guy's valid, then we're done 59 if (token.status == Token::VALID) { 60 return; 61 } 62 63 // here's where we clean up the impossible tokens 64 if (token.status == Token::INVALID) { 65 m_tokens.pop(); 66 continue; 67 } 68 69 // note: what's left are the unverified tokens 70 } 71 72 // no token? maybe we've actually finished 73 if (m_endedStream) { 74 return; 75 } 76 77 // no? then scan... 78 ScanNextToken(); 79 } 80 } 81 82 void Scanner::ScanNextToken() { 83 if (m_endedStream) { 84 return; 85 } 86 87 if (!m_startedStream) { 88 return StartStream(); 89 } 90 91 // get rid of whitespace, etc. (in between tokens it should be irrelevant) 92 ScanToNextToken(); 93 94 // maybe need to end some blocks 95 PopIndentToHere(); 96 97 // ***** 98 // And now branch based on the next few characters! 99 // ***** 100 101 // end of stream 102 if (!INPUT) { 103 return EndStream(); 104 } 105 106 if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) { 107 return ScanDirective(); 108 } 109 110 // document token 111 if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) { 112 return ScanDocStart(); 113 } 114 115 if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) { 116 return ScanDocEnd(); 117 } 118 119 // flow start/end/entry 120 if (INPUT.peek() == Keys::FlowSeqStart || 121 INPUT.peek() == Keys::FlowMapStart) { 122 return ScanFlowStart(); 123 } 124 125 if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) { 126 return ScanFlowEnd(); 127 } 128 129 if (INPUT.peek() == Keys::FlowEntry) { 130 return ScanFlowEntry(); 131 } 132 133 // block/map stuff 134 if (Exp::BlockEntry().Matches(INPUT)) { 135 return ScanBlockEntry(); 136 } 137 138 if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) { 139 return ScanKey(); 140 } 141 142 if (GetValueRegex().Matches(INPUT)) { 143 return ScanValue(); 144 } 145 146 // alias/anchor 147 if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) { 148 return ScanAnchorOrAlias(); 149 } 150 151 // tag 152 if (INPUT.peek() == Keys::Tag) { 153 return ScanTag(); 154 } 155 156 // special scalars 157 if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || 158 INPUT.peek() == Keys::FoldedScalar)) { 159 return ScanBlockScalar(); 160 } 161 162 if (INPUT.peek() == '\'' || INPUT.peek() == '\"') { 163 return ScanQuotedScalar(); 164 } 165 166 // plain scalars 167 if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()) 168 .Matches(INPUT)) { 169 return ScanPlainScalar(); 170 } 171 172 // don't know what it is! 173 throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN); 174 } 175 176 void Scanner::ScanToNextToken() { 177 while (true) { 178 // first eat whitespace 179 while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { 180 if (InBlockContext() && Exp::Tab().Matches(INPUT)) { 181 m_simpleKeyAllowed = false; 182 } 183 INPUT.eat(1); 184 } 185 186 // then eat a comment 187 if (Exp::Comment().Matches(INPUT)) { 188 // eat until line break 189 while (INPUT && !Exp::Break().Matches(INPUT)) { 190 INPUT.eat(1); 191 } 192 } 193 194 // if it's NOT a line break, then we're done! 195 if (!Exp::Break().Matches(INPUT)) { 196 break; 197 } 198 199 // otherwise, let's eat the line break and keep going 200 int n = Exp::Break().Match(INPUT); 201 INPUT.eat(n); 202 203 // oh yeah, and let's get rid of that simple key 204 InvalidateSimpleKey(); 205 206 // new line - we may be able to accept a simple key now 207 if (InBlockContext()) { 208 m_simpleKeyAllowed = true; 209 } 210 } 211 } 212 213 /////////////////////////////////////////////////////////////////////// 214 // Misc. helpers 215 216 // IsWhitespaceToBeEaten 217 // . We can eat whitespace if it's a space or tab 218 // . Note: originally tabs in block context couldn't be eaten 219 // "where a simple key could be allowed 220 // (i.e., not at the beginning of a line, or following '-', '?', or 221 // ':')" 222 // I think this is wrong, since tabs can be non-content whitespace; it's just 223 // that they can't contribute to indentation, so once you've seen a tab in a 224 // line, you can't start a simple key 225 bool Scanner::IsWhitespaceToBeEaten(char ch) { 226 if (ch == ' ') { 227 return true; 228 } 229 230 if (ch == '\t') { 231 return true; 232 } 233 234 return false; 235 } 236 237 const RegEx& Scanner::GetValueRegex() const { 238 if (InBlockContext()) { 239 return Exp::Value(); 240 } 241 242 return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); 243 } 244 245 void Scanner::StartStream() { 246 m_startedStream = true; 247 m_simpleKeyAllowed = true; 248 std::unique_ptr<IndentMarker> pIndent( 249 new IndentMarker(-1, IndentMarker::NONE)); 250 m_indentRefs.push_back(std::move(pIndent)); 251 m_indents.push(&m_indentRefs.back()); 252 } 253 254 void Scanner::EndStream() { 255 // force newline 256 if (INPUT.column() > 0) { 257 INPUT.ResetColumn(); 258 } 259 260 PopAllIndents(); 261 PopAllSimpleKeys(); 262 263 m_simpleKeyAllowed = false; 264 m_endedStream = true; 265 } 266 267 Token* Scanner::PushToken(Token::TYPE type) { 268 m_tokens.push(Token(type, INPUT.mark())); 269 return &m_tokens.back(); 270 } 271 272 Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const { 273 switch (type) { 274 case IndentMarker::SEQ: 275 return Token::BLOCK_SEQ_START; 276 case IndentMarker::MAP: 277 return Token::BLOCK_MAP_START; 278 case IndentMarker::NONE: 279 assert(false); 280 break; 281 } 282 assert(false); 283 throw std::runtime_error("yaml-cpp: internal error, invalid indent type"); 284 } 285 286 Scanner::IndentMarker* Scanner::PushIndentTo(int column, 287 IndentMarker::INDENT_TYPE type) { 288 // are we in flow? 289 if (InFlowContext()) { 290 return nullptr; 291 } 292 293 std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type)); 294 IndentMarker& indent = *pIndent; 295 const IndentMarker& lastIndent = *m_indents.top(); 296 297 // is this actually an indentation? 298 if (indent.column < lastIndent.column) { 299 return nullptr; 300 } 301 if (indent.column == lastIndent.column && 302 !(indent.type == IndentMarker::SEQ && 303 lastIndent.type == IndentMarker::MAP)) { 304 return nullptr; 305 } 306 307 // push a start token 308 indent.pStartToken = PushToken(GetStartTokenFor(type)); 309 310 // and then the indent 311 m_indents.push(&indent); 312 m_indentRefs.push_back(std::move(pIndent)); 313 return &m_indentRefs.back(); 314 } 315 316 void Scanner::PopIndentToHere() { 317 // are we in flow? 318 if (InFlowContext()) { 319 return; 320 } 321 322 // now pop away 323 while (!m_indents.empty()) { 324 const IndentMarker& indent = *m_indents.top(); 325 if (indent.column < INPUT.column()) { 326 break; 327 } 328 if (indent.column == INPUT.column() && 329 !(indent.type == IndentMarker::SEQ && 330 !Exp::BlockEntry().Matches(INPUT))) { 331 break; 332 } 333 334 PopIndent(); 335 } 336 337 while (!m_indents.empty() && 338 m_indents.top()->status == IndentMarker::INVALID) { 339 PopIndent(); 340 } 341 } 342 343 void Scanner::PopAllIndents() { 344 // are we in flow? 345 if (InFlowContext()) { 346 return; 347 } 348 349 // now pop away 350 while (!m_indents.empty()) { 351 const IndentMarker& indent = *m_indents.top(); 352 if (indent.type == IndentMarker::NONE) { 353 break; 354 } 355 356 PopIndent(); 357 } 358 } 359 360 void Scanner::PopIndent() { 361 const IndentMarker& indent = *m_indents.top(); 362 m_indents.pop(); 363 364 if (indent.status != IndentMarker::VALID) { 365 InvalidateSimpleKey(); 366 return; 367 } 368 369 if (indent.type == IndentMarker::SEQ) { 370 m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark())); 371 } else if (indent.type == IndentMarker::MAP) { 372 m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark())); 373 } 374 } 375 376 int Scanner::GetTopIndent() const { 377 if (m_indents.empty()) { 378 return 0; 379 } 380 return m_indents.top()->column; 381 } 382 383 void Scanner::ThrowParserException(const std::string& msg) const { 384 Mark mark = Mark::null_mark(); 385 if (!m_tokens.empty()) { 386 const Token& token = m_tokens.front(); 387 mark = token.mark; 388 } 389 throw ParserException(mark, msg); 390 } 391 } // namespace YAML