scanscalar.cpp (6990B)
1 #include "scanscalar.h" 2 3 #include <algorithm> 4 5 #include "exp.h" 6 #include "regeximpl.h" 7 #include "stream.h" 8 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep 9 10 namespace YAML { 11 // ScanScalar 12 // . This is where the scalar magic happens. 13 // 14 // . We do the scanning in three phases: 15 // 1. Scan until newline 16 // 2. Eat newline 17 // 3. Scan leading blanks. 18 // 19 // . Depending on the parameters given, we store or stop 20 // and different places in the above flow. 21 std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { 22 bool foundNonEmptyLine = false; 23 bool pastOpeningBreak = (params.fold == FOLD_FLOW); 24 bool emptyLine = false, moreIndented = false; 25 int foldedNewlineCount = 0; 26 bool foldedNewlineStartedMoreIndented = false; 27 std::size_t lastEscapedChar = std::string::npos; 28 std::string scalar; 29 params.leadingSpaces = false; 30 31 if (!params.end) { 32 params.end = &Exp::Empty(); 33 } 34 35 while (INPUT) { 36 // ******************************** 37 // Phase #1: scan until line ending 38 39 std::size_t lastNonWhitespaceChar = scalar.size(); 40 bool escapedNewline = false; 41 while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) { 42 if (!INPUT) { 43 break; 44 } 45 46 // document indicator? 47 if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { 48 if (params.onDocIndicator == BREAK) { 49 break; 50 } 51 if (params.onDocIndicator == THROW) { 52 throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); 53 } 54 } 55 56 foundNonEmptyLine = true; 57 pastOpeningBreak = true; 58 59 // escaped newline? (only if we're escaping on slash) 60 if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { 61 // eat escape character and get out (but preserve trailing whitespace!) 62 INPUT.get(); 63 lastNonWhitespaceChar = scalar.size(); 64 lastEscapedChar = scalar.size(); 65 escapedNewline = true; 66 break; 67 } 68 69 // escape this? 70 if (INPUT.peek() == params.escape) { 71 scalar += Exp::Escape(INPUT); 72 lastNonWhitespaceChar = scalar.size(); 73 lastEscapedChar = scalar.size(); 74 continue; 75 } 76 77 // otherwise, just add the damn character 78 char ch = INPUT.get(); 79 scalar += ch; 80 if (ch != ' ' && ch != '\t') { 81 lastNonWhitespaceChar = scalar.size(); 82 } 83 } 84 85 // eof? if we're looking to eat something, then we throw 86 if (!INPUT) { 87 if (params.eatEnd) { 88 throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); 89 } 90 break; 91 } 92 93 // doc indicator? 94 if (params.onDocIndicator == BREAK && INPUT.column() == 0 && 95 Exp::DocIndicator().Matches(INPUT)) { 96 break; 97 } 98 99 // are we done via character match? 100 int n = params.end->Match(INPUT); 101 if (n >= 0) { 102 if (params.eatEnd) { 103 INPUT.eat(n); 104 } 105 break; 106 } 107 108 // do we remove trailing whitespace? 109 if (params.fold == FOLD_FLOW) 110 scalar.erase(lastNonWhitespaceChar); 111 112 // ******************************** 113 // Phase #2: eat line ending 114 n = Exp::Break().Match(INPUT); 115 INPUT.eat(n); 116 117 // ******************************** 118 // Phase #3: scan initial spaces 119 120 // first the required indentation 121 while (INPUT.peek() == ' ' && 122 (INPUT.column() < params.indent || 123 (params.detectIndent && !foundNonEmptyLine)) && 124 !params.end->Matches(INPUT)) { 125 INPUT.eat(1); 126 } 127 128 // update indent if we're auto-detecting 129 if (params.detectIndent && !foundNonEmptyLine) { 130 params.indent = std::max(params.indent, INPUT.column()); 131 } 132 133 // and then the rest of the whitespace 134 while (Exp::Blank().Matches(INPUT)) { 135 // we check for tabs that masquerade as indentation 136 if (INPUT.peek() == '\t' && INPUT.column() < params.indent && 137 params.onTabInIndentation == THROW) { 138 throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); 139 } 140 141 if (!params.eatLeadingWhitespace) { 142 break; 143 } 144 145 if (params.end->Matches(INPUT)) { 146 break; 147 } 148 149 INPUT.eat(1); 150 } 151 152 // was this an empty line? 153 bool nextEmptyLine = Exp::Break().Matches(INPUT); 154 bool nextMoreIndented = Exp::Blank().Matches(INPUT); 155 if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) 156 foldedNewlineStartedMoreIndented = moreIndented; 157 158 // for block scalars, we always start with a newline, so we should ignore it 159 // (not fold or keep) 160 if (pastOpeningBreak) { 161 switch (params.fold) { 162 case DONT_FOLD: 163 scalar += "\n"; 164 break; 165 case FOLD_BLOCK: 166 if (!emptyLine && !nextEmptyLine && !moreIndented && 167 !nextMoreIndented && INPUT.column() >= params.indent) { 168 scalar += " "; 169 } else if (nextEmptyLine) { 170 foldedNewlineCount++; 171 } else { 172 scalar += "\n"; 173 } 174 175 if (!nextEmptyLine && foldedNewlineCount > 0) { 176 scalar += std::string(foldedNewlineCount - 1, '\n'); 177 if (foldedNewlineStartedMoreIndented || 178 nextMoreIndented | !foundNonEmptyLine) { 179 scalar += "\n"; 180 } 181 foldedNewlineCount = 0; 182 } 183 break; 184 case FOLD_FLOW: 185 if (nextEmptyLine) { 186 scalar += "\n"; 187 } else if (!emptyLine && !escapedNewline) { 188 scalar += " "; 189 } 190 break; 191 } 192 } 193 194 emptyLine = nextEmptyLine; 195 moreIndented = nextMoreIndented; 196 pastOpeningBreak = true; 197 198 // are we done via indentation? 199 if (!emptyLine && INPUT.column() < params.indent) { 200 params.leadingSpaces = true; 201 break; 202 } 203 } 204 205 // post-processing 206 if (params.trimTrailingSpaces) { 207 std::size_t pos = scalar.find_last_not_of(" \t"); 208 if (lastEscapedChar != std::string::npos) { 209 if (pos < lastEscapedChar || pos == std::string::npos) { 210 pos = lastEscapedChar; 211 } 212 } 213 if (pos < scalar.size()) { 214 scalar.erase(pos + 1); 215 } 216 } 217 218 switch (params.chomp) { 219 case CLIP: { 220 std::size_t pos = scalar.find_last_not_of('\n'); 221 if (lastEscapedChar != std::string::npos) { 222 if (pos < lastEscapedChar || pos == std::string::npos) { 223 pos = lastEscapedChar; 224 } 225 } 226 if (pos == std::string::npos) { 227 scalar.erase(); 228 } else if (pos + 1 < scalar.size()) { 229 scalar.erase(pos + 2); 230 } 231 } break; 232 case STRIP: { 233 std::size_t pos = scalar.find_last_not_of('\n'); 234 if (lastEscapedChar != std::string::npos) { 235 if (pos < lastEscapedChar || pos == std::string::npos) { 236 pos = lastEscapedChar; 237 } 238 } 239 if (pos == std::string::npos) { 240 scalar.erase(); 241 } else if (pos < scalar.size()) { 242 scalar.erase(pos + 1); 243 } 244 } break; 245 default: 246 break; 247 } 248 249 return scalar; 250 } 251 } // namespace YAML