yaml-cpp

FORK: A YAML parser and emitter in C++
git clone https://git.neptards.moe/neptards/yaml-cpp.git
Log | Files | Refs | README | LICENSE

scanscalar.cpp (6990B)


      1 #include "scanscalar.h"
      2 
      3 #include <algorithm>
      4 
      5 #include "exp.h"
      6 #include "regeximpl.h"
      7 #include "stream.h"
      8 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
      9 
     10 namespace YAML {
     11 // ScanScalar
     12 // . This is where the scalar magic happens.
     13 //
     14 // . We do the scanning in three phases:
     15 //   1. Scan until newline
     16 //   2. Eat newline
     17 //   3. Scan leading blanks.
     18 //
     19 // . Depending on the parameters given, we store or stop
     20 //   and different places in the above flow.
     21 std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
     22   bool foundNonEmptyLine = false;
     23   bool pastOpeningBreak = (params.fold == FOLD_FLOW);
     24   bool emptyLine = false, moreIndented = false;
     25   int foldedNewlineCount = 0;
     26   bool foldedNewlineStartedMoreIndented = false;
     27   std::size_t lastEscapedChar = std::string::npos;
     28   std::string scalar;
     29   params.leadingSpaces = false;
     30 
     31   if (!params.end) {
     32     params.end = &Exp::Empty();
     33   }
     34 
     35   while (INPUT) {
     36     // ********************************
     37     // Phase #1: scan until line ending
     38 
     39     std::size_t lastNonWhitespaceChar = scalar.size();
     40     bool escapedNewline = false;
     41     while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
     42       if (!INPUT) {
     43         break;
     44       }
     45 
     46       // document indicator?
     47       if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
     48         if (params.onDocIndicator == BREAK) {
     49           break;
     50         }
     51         if (params.onDocIndicator == THROW) {
     52           throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
     53         }
     54       }
     55 
     56       foundNonEmptyLine = true;
     57       pastOpeningBreak = true;
     58 
     59       // escaped newline? (only if we're escaping on slash)
     60       if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
     61         // eat escape character and get out (but preserve trailing whitespace!)
     62         INPUT.get();
     63         lastNonWhitespaceChar = scalar.size();
     64         lastEscapedChar = scalar.size();
     65         escapedNewline = true;
     66         break;
     67       }
     68 
     69       // escape this?
     70       if (INPUT.peek() == params.escape) {
     71         scalar += Exp::Escape(INPUT);
     72         lastNonWhitespaceChar = scalar.size();
     73         lastEscapedChar = scalar.size();
     74         continue;
     75       }
     76 
     77       // otherwise, just add the damn character
     78       char ch = INPUT.get();
     79       scalar += ch;
     80       if (ch != ' ' && ch != '\t') {
     81         lastNonWhitespaceChar = scalar.size();
     82       }
     83     }
     84 
     85     // eof? if we're looking to eat something, then we throw
     86     if (!INPUT) {
     87       if (params.eatEnd) {
     88         throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
     89       }
     90       break;
     91     }
     92 
     93     // doc indicator?
     94     if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
     95         Exp::DocIndicator().Matches(INPUT)) {
     96       break;
     97     }
     98 
     99     // are we done via character match?
    100     int n = params.end->Match(INPUT);
    101     if (n >= 0) {
    102       if (params.eatEnd) {
    103         INPUT.eat(n);
    104       }
    105       break;
    106     }
    107 
    108     // do we remove trailing whitespace?
    109     if (params.fold == FOLD_FLOW)
    110       scalar.erase(lastNonWhitespaceChar);
    111 
    112     // ********************************
    113     // Phase #2: eat line ending
    114     n = Exp::Break().Match(INPUT);
    115     INPUT.eat(n);
    116 
    117     // ********************************
    118     // Phase #3: scan initial spaces
    119 
    120     // first the required indentation
    121     while (INPUT.peek() == ' ' &&
    122            (INPUT.column() < params.indent ||
    123             (params.detectIndent && !foundNonEmptyLine)) &&
    124            !params.end->Matches(INPUT)) {
    125       INPUT.eat(1);
    126     }
    127 
    128     // update indent if we're auto-detecting
    129     if (params.detectIndent && !foundNonEmptyLine) {
    130       params.indent = std::max(params.indent, INPUT.column());
    131     }
    132 
    133     // and then the rest of the whitespace
    134     while (Exp::Blank().Matches(INPUT)) {
    135       // we check for tabs that masquerade as indentation
    136       if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
    137           params.onTabInIndentation == THROW) {
    138         throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
    139       }
    140 
    141       if (!params.eatLeadingWhitespace) {
    142         break;
    143       }
    144 
    145       if (params.end->Matches(INPUT)) {
    146         break;
    147       }
    148 
    149       INPUT.eat(1);
    150     }
    151 
    152     // was this an empty line?
    153     bool nextEmptyLine = Exp::Break().Matches(INPUT);
    154     bool nextMoreIndented = Exp::Blank().Matches(INPUT);
    155     if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
    156       foldedNewlineStartedMoreIndented = moreIndented;
    157 
    158     // for block scalars, we always start with a newline, so we should ignore it
    159     // (not fold or keep)
    160     if (pastOpeningBreak) {
    161       switch (params.fold) {
    162         case DONT_FOLD:
    163           scalar += "\n";
    164           break;
    165         case FOLD_BLOCK:
    166           if (!emptyLine && !nextEmptyLine && !moreIndented &&
    167               !nextMoreIndented && INPUT.column() >= params.indent) {
    168             scalar += " ";
    169           } else if (nextEmptyLine) {
    170             foldedNewlineCount++;
    171           } else {
    172             scalar += "\n";
    173           }
    174 
    175           if (!nextEmptyLine && foldedNewlineCount > 0) {
    176             scalar += std::string(foldedNewlineCount - 1, '\n');
    177             if (foldedNewlineStartedMoreIndented ||
    178                 nextMoreIndented | !foundNonEmptyLine) {
    179               scalar += "\n";
    180             }
    181             foldedNewlineCount = 0;
    182           }
    183           break;
    184         case FOLD_FLOW:
    185           if (nextEmptyLine) {
    186             scalar += "\n";
    187           } else if (!emptyLine && !escapedNewline) {
    188             scalar += " ";
    189           }
    190           break;
    191       }
    192     }
    193 
    194     emptyLine = nextEmptyLine;
    195     moreIndented = nextMoreIndented;
    196     pastOpeningBreak = true;
    197 
    198     // are we done via indentation?
    199     if (!emptyLine && INPUT.column() < params.indent) {
    200       params.leadingSpaces = true;
    201       break;
    202     }
    203   }
    204 
    205   // post-processing
    206   if (params.trimTrailingSpaces) {
    207     std::size_t pos = scalar.find_last_not_of(" \t");
    208     if (lastEscapedChar != std::string::npos) {
    209       if (pos < lastEscapedChar || pos == std::string::npos) {
    210         pos = lastEscapedChar;
    211       }
    212     }
    213     if (pos < scalar.size()) {
    214       scalar.erase(pos + 1);
    215     }
    216   }
    217 
    218   switch (params.chomp) {
    219     case CLIP: {
    220       std::size_t pos = scalar.find_last_not_of('\n');
    221       if (lastEscapedChar != std::string::npos) {
    222         if (pos < lastEscapedChar || pos == std::string::npos) {
    223           pos = lastEscapedChar;
    224         }
    225       }
    226       if (pos == std::string::npos) {
    227         scalar.erase();
    228       } else if (pos + 1 < scalar.size()) {
    229         scalar.erase(pos + 2);
    230       }
    231     } break;
    232     case STRIP: {
    233       std::size_t pos = scalar.find_last_not_of('\n');
    234       if (lastEscapedChar != std::string::npos) {
    235         if (pos < lastEscapedChar || pos == std::string::npos) {
    236           pos = lastEscapedChar;
    237         }
    238       }
    239       if (pos == std::string::npos) {
    240         scalar.erase();
    241       } else if (pos < scalar.size()) {
    242         scalar.erase(pos + 1);
    243       }
    244     } break;
    245     default:
    246       break;
    247   }
    248 
    249   return scalar;
    250 }
    251 }  // namespace YAML