scantoken.cpp (11451B)
1 #include <sstream> 2 3 #include "exp.h" 4 #include "regex_yaml.h" 5 #include "regeximpl.h" 6 #include "scanner.h" 7 #include "scanscalar.h" 8 #include "scantag.h" // IWYU pragma: keep 9 #include "tag.h" // IWYU pragma: keep 10 #include "token.h" 11 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep 12 #include "yaml-cpp/mark.h" 13 14 namespace YAML { 15 /////////////////////////////////////////////////////////////////////// 16 // Specialization for scanning specific tokens 17 18 // Directive 19 // . Note: no semantic checking is done here (that's for the parser to do) 20 void Scanner::ScanDirective() { 21 std::string name; 22 std::vector<std::string> params; 23 24 // pop indents and simple keys 25 PopAllIndents(); 26 PopAllSimpleKeys(); 27 28 m_simpleKeyAllowed = false; 29 m_canBeJSONFlow = false; 30 31 // store pos and eat indicator 32 Token token(Token::DIRECTIVE, INPUT.mark()); 33 INPUT.eat(1); 34 35 // read name 36 while (INPUT && !Exp::BlankOrBreak().Matches(INPUT)) 37 token.value += INPUT.get(); 38 39 // read parameters 40 while (true) { 41 // first get rid of whitespace 42 while (Exp::Blank().Matches(INPUT)) 43 INPUT.eat(1); 44 45 // break on newline or comment 46 if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) 47 break; 48 49 // now read parameter 50 std::string param; 51 while (INPUT && !Exp::BlankOrBreak().Matches(INPUT)) 52 param += INPUT.get(); 53 54 token.params.push_back(param); 55 } 56 57 m_tokens.push(token); 58 } 59 60 // DocStart 61 void Scanner::ScanDocStart() { 62 PopAllIndents(); 63 PopAllSimpleKeys(); 64 m_simpleKeyAllowed = false; 65 m_canBeJSONFlow = false; 66 67 // eat 68 Mark mark = INPUT.mark(); 69 INPUT.eat(3); 70 m_tokens.push(Token(Token::DOC_START, mark)); 71 } 72 73 // DocEnd 74 void Scanner::ScanDocEnd() { 75 PopAllIndents(); 76 PopAllSimpleKeys(); 77 m_simpleKeyAllowed = false; 78 m_canBeJSONFlow = false; 79 80 // eat 81 Mark mark = INPUT.mark(); 82 INPUT.eat(3); 83 m_tokens.push(Token(Token::DOC_END, mark)); 84 } 85 86 // FlowStart 87 void Scanner::ScanFlowStart() { 88 // flows can be simple keys 89 InsertPotentialSimpleKey(); 90 m_simpleKeyAllowed = true; 91 m_canBeJSONFlow = false; 92 93 // eat 94 Mark mark = INPUT.mark(); 95 char ch = INPUT.get(); 96 FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP); 97 m_flows.push(flowType); 98 Token::TYPE type = 99 (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START); 100 m_tokens.push(Token(type, mark)); 101 } 102 103 // FlowEnd 104 void Scanner::ScanFlowEnd() { 105 if (InBlockContext()) 106 throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END); 107 108 // we might have a solo entry in the flow context 109 if (InFlowContext()) { 110 if (m_flows.top() == FLOW_MAP && VerifySimpleKey()) 111 m_tokens.push(Token(Token::VALUE, INPUT.mark())); 112 else if (m_flows.top() == FLOW_SEQ) 113 InvalidateSimpleKey(); 114 } 115 116 m_simpleKeyAllowed = false; 117 m_canBeJSONFlow = true; 118 119 // eat 120 Mark mark = INPUT.mark(); 121 char ch = INPUT.get(); 122 123 // check that it matches the start 124 FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP); 125 if (m_flows.top() != flowType) 126 throw ParserException(mark, ErrorMsg::FLOW_END); 127 m_flows.pop(); 128 129 Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END); 130 m_tokens.push(Token(type, mark)); 131 } 132 133 // FlowEntry 134 void Scanner::ScanFlowEntry() { 135 // we might have a solo entry in the flow context 136 if (InFlowContext()) { 137 if (m_flows.top() == FLOW_MAP && VerifySimpleKey()) 138 m_tokens.push(Token(Token::VALUE, INPUT.mark())); 139 else if (m_flows.top() == FLOW_SEQ) 140 InvalidateSimpleKey(); 141 } 142 143 m_simpleKeyAllowed = true; 144 m_canBeJSONFlow = false; 145 146 // eat 147 Mark mark = INPUT.mark(); 148 INPUT.eat(1); 149 m_tokens.push(Token(Token::FLOW_ENTRY, mark)); 150 } 151 152 // BlockEntry 153 void Scanner::ScanBlockEntry() { 154 // we better be in the block context! 155 if (InFlowContext()) 156 throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); 157 158 // can we put it here? 159 if (!m_simpleKeyAllowed) 160 throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); 161 162 PushIndentTo(INPUT.column(), IndentMarker::SEQ); 163 m_simpleKeyAllowed = true; 164 m_canBeJSONFlow = false; 165 166 // eat 167 Mark mark = INPUT.mark(); 168 INPUT.eat(1); 169 m_tokens.push(Token(Token::BLOCK_ENTRY, mark)); 170 } 171 172 // Key 173 void Scanner::ScanKey() { 174 // handle keys differently in the block context (and manage indents) 175 if (InBlockContext()) { 176 if (!m_simpleKeyAllowed) 177 throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY); 178 179 PushIndentTo(INPUT.column(), IndentMarker::MAP); 180 } 181 182 // can only put a simple key here if we're in block context 183 m_simpleKeyAllowed = InBlockContext(); 184 185 // eat 186 Mark mark = INPUT.mark(); 187 INPUT.eat(1); 188 m_tokens.push(Token(Token::KEY, mark)); 189 } 190 191 // Value 192 void Scanner::ScanValue() { 193 // and check that simple key 194 bool isSimpleKey = VerifySimpleKey(); 195 m_canBeJSONFlow = false; 196 197 if (isSimpleKey) { 198 // can't follow a simple key with another simple key (dunno why, though - it 199 // seems fine) 200 m_simpleKeyAllowed = false; 201 } else { 202 // handle values differently in the block context (and manage indents) 203 if (InBlockContext()) { 204 if (!m_simpleKeyAllowed) 205 throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE); 206 207 PushIndentTo(INPUT.column(), IndentMarker::MAP); 208 } 209 210 // can only put a simple key here if we're in block context 211 m_simpleKeyAllowed = InBlockContext(); 212 } 213 214 // eat 215 Mark mark = INPUT.mark(); 216 INPUT.eat(1); 217 m_tokens.push(Token(Token::VALUE, mark)); 218 } 219 220 // AnchorOrAlias 221 void Scanner::ScanAnchorOrAlias() { 222 bool alias; 223 std::string name; 224 225 // insert a potential simple key 226 InsertPotentialSimpleKey(); 227 m_simpleKeyAllowed = false; 228 m_canBeJSONFlow = false; 229 230 // eat the indicator 231 Mark mark = INPUT.mark(); 232 char indicator = INPUT.get(); 233 alias = (indicator == Keys::Alias); 234 235 // now eat the content 236 while (INPUT && Exp::Anchor().Matches(INPUT)) 237 name += INPUT.get(); 238 239 // we need to have read SOMETHING! 240 if (name.empty()) 241 throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND 242 : ErrorMsg::ANCHOR_NOT_FOUND); 243 244 // and needs to end correctly 245 if (INPUT && !Exp::AnchorEnd().Matches(INPUT)) 246 throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS 247 : ErrorMsg::CHAR_IN_ANCHOR); 248 249 // and we're done 250 Token token(alias ? Token::ALIAS : Token::ANCHOR, mark); 251 token.value = name; 252 m_tokens.push(token); 253 } 254 255 // Tag 256 void Scanner::ScanTag() { 257 // insert a potential simple key 258 InsertPotentialSimpleKey(); 259 m_simpleKeyAllowed = false; 260 m_canBeJSONFlow = false; 261 262 Token token(Token::TAG, INPUT.mark()); 263 264 // eat the indicator 265 INPUT.get(); 266 267 if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) { 268 std::string tag = ScanVerbatimTag(INPUT); 269 270 token.value = tag; 271 token.data = Tag::VERBATIM; 272 } else { 273 bool canBeHandle; 274 token.value = ScanTagHandle(INPUT, canBeHandle); 275 if (!canBeHandle && token.value.empty()) 276 token.data = Tag::NON_SPECIFIC; 277 else if (token.value.empty()) 278 token.data = Tag::SECONDARY_HANDLE; 279 else 280 token.data = Tag::PRIMARY_HANDLE; 281 282 // is there a suffix? 283 if (canBeHandle && INPUT.peek() == Keys::Tag) { 284 // eat the indicator 285 INPUT.get(); 286 token.params.push_back(ScanTagSuffix(INPUT)); 287 token.data = Tag::NAMED_HANDLE; 288 } 289 } 290 291 m_tokens.push(token); 292 } 293 294 // PlainScalar 295 void Scanner::ScanPlainScalar() { 296 std::string scalar; 297 298 // set up the scanning parameters 299 ScanScalarParams params; 300 params.end = 301 (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd()); 302 params.eatEnd = false; 303 params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); 304 params.fold = FOLD_FLOW; 305 params.eatLeadingWhitespace = true; 306 params.trimTrailingSpaces = true; 307 params.chomp = STRIP; 308 params.onDocIndicator = BREAK; 309 params.onTabInIndentation = THROW; 310 311 // insert a potential simple key 312 InsertPotentialSimpleKey(); 313 314 Mark mark = INPUT.mark(); 315 scalar = ScanScalar(INPUT, params); 316 317 // can have a simple key only if we ended the scalar by starting a new line 318 m_simpleKeyAllowed = params.leadingSpaces; 319 m_canBeJSONFlow = false; 320 321 // finally, check and see if we ended on an illegal character 322 // if(Exp::IllegalCharInScalar.Matches(INPUT)) 323 // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR); 324 325 Token token(Token::PLAIN_SCALAR, mark); 326 token.value = scalar; 327 m_tokens.push(token); 328 } 329 330 // QuotedScalar 331 void Scanner::ScanQuotedScalar() { 332 std::string scalar; 333 334 // peek at single or double quote (don't eat because we need to preserve (for 335 // the time being) the input position) 336 char quote = INPUT.peek(); 337 bool single = (quote == '\''); 338 339 // setup the scanning parameters 340 ScanScalarParams params; 341 RegEx end = (single ? RegEx(quote) & !Exp::EscSingleQuote() : RegEx(quote)); 342 params.end = &end; 343 params.eatEnd = true; 344 params.escape = (single ? '\'' : '\\'); 345 params.indent = 0; 346 params.fold = FOLD_FLOW; 347 params.eatLeadingWhitespace = true; 348 params.trimTrailingSpaces = false; 349 params.chomp = CLIP; 350 params.onDocIndicator = THROW; 351 352 // insert a potential simple key 353 InsertPotentialSimpleKey(); 354 355 Mark mark = INPUT.mark(); 356 357 // now eat that opening quote 358 INPUT.get(); 359 360 // and scan 361 scalar = ScanScalar(INPUT, params); 362 m_simpleKeyAllowed = false; 363 m_canBeJSONFlow = true; 364 365 Token token(Token::NON_PLAIN_SCALAR, mark); 366 token.value = scalar; 367 m_tokens.push(token); 368 } 369 370 // BlockScalarToken 371 // . These need a little extra processing beforehand. 372 // . We need to scan the line where the indicator is (this doesn't count as part 373 // of the scalar), 374 // and then we need to figure out what level of indentation we'll be using. 375 void Scanner::ScanBlockScalar() { 376 std::string scalar; 377 378 ScanScalarParams params; 379 params.indent = 1; 380 params.detectIndent = true; 381 382 // eat block indicator ('|' or '>') 383 Mark mark = INPUT.mark(); 384 char indicator = INPUT.get(); 385 params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD); 386 387 // eat chomping/indentation indicators 388 params.chomp = CLIP; 389 int n = Exp::Chomp().Match(INPUT); 390 for (int i = 0; i < n; i++) { 391 char ch = INPUT.get(); 392 if (ch == '+') 393 params.chomp = KEEP; 394 else if (ch == '-') 395 params.chomp = STRIP; 396 else if (Exp::Digit().Matches(ch)) { 397 if (ch == '0') 398 throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK); 399 400 params.indent = ch - '0'; 401 params.detectIndent = false; 402 } 403 } 404 405 // now eat whitespace 406 while (Exp::Blank().Matches(INPUT)) 407 INPUT.eat(1); 408 409 // and comments to the end of the line 410 if (Exp::Comment().Matches(INPUT)) 411 while (INPUT && !Exp::Break().Matches(INPUT)) 412 INPUT.eat(1); 413 414 // if it's not a line break, then we ran into a bad character inline 415 if (INPUT && !Exp::Break().Matches(INPUT)) 416 throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK); 417 418 // set the initial indentation 419 if (GetTopIndent() >= 0) 420 params.indent += GetTopIndent(); 421 422 params.eatLeadingWhitespace = false; 423 params.trimTrailingSpaces = false; 424 params.onTabInIndentation = THROW; 425 426 scalar = ScanScalar(INPUT, params); 427 428 // simple keys always ok after block scalars (since we're gonna start a new 429 // line anyways) 430 m_simpleKeyAllowed = true; 431 m_canBeJSONFlow = false; 432 433 Token token(Token::NON_PLAIN_SCALAR, mark); 434 token.value = scalar; 435 m_tokens.push(token); 436 } 437 } // namespace YAML