yaml-cpp

FORK: A YAML parser and emitter in C++
git clone https://git.neptards.moe/neptards/yaml-cpp.git
Log | Files | Refs | README | LICENSE

encoding_test.cpp (4591B)


      1 #include <sstream>
      2 
      3 #include "handler_test.h"
      4 #include "yaml-cpp/yaml.h"  // IWYU pragma: keep
      5 
      6 #include "gtest/gtest.h"
      7 
      8 using ::testing::_;
      9 using ::testing::InSequence;
     10 
     11 namespace YAML {
     12 namespace {
     13 typedef void (*EncodingFn)(std::ostream&, int);
     14 
     15 inline char Byte(int ch) {
     16   return static_cast<char>(
     17       static_cast<unsigned char>(static_cast<unsigned int>(ch)));
     18 }
     19 
     20 void EncodeToUtf8(std::ostream& stream, int ch) {
     21   if (ch <= 0x7F) {
     22     stream << Byte(ch);
     23   } else if (ch <= 0x7FF) {
     24     stream << Byte(0xC0 | (ch >> 6));
     25     stream << Byte(0x80 | (ch & 0x3F));
     26   } else if (ch <= 0xFFFF) {
     27     stream << Byte(0xE0 | (ch >> 12));
     28     stream << Byte(0x80 | ((ch >> 6) & 0x3F));
     29     stream << Byte(0x80 | (ch & 0x3F));
     30   } else if (ch <= 0x1FFFFF) {
     31     stream << Byte(0xF0 | (ch >> 18));
     32     stream << Byte(0x80 | ((ch >> 12) & 0x3F));
     33     stream << Byte(0x80 | ((ch >> 6) & 0x3F));
     34     stream << Byte(0x80 | (ch & 0x3F));
     35   }
     36 }
     37 
     38 bool SplitUtf16HighChar(std::ostream& stream, EncodingFn encoding, int ch) {
     39   int biasedValue = ch - 0x10000;
     40   if (biasedValue < 0) {
     41     return false;
     42   }
     43   int high = 0xD800 | (biasedValue >> 10);
     44   int low = 0xDC00 | (biasedValue & 0x3FF);
     45   encoding(stream, high);
     46   encoding(stream, low);
     47   return true;
     48 }
     49 
     50 void EncodeToUtf16LE(std::ostream& stream, int ch) {
     51   if (!SplitUtf16HighChar(stream, &EncodeToUtf16LE, ch)) {
     52     stream << Byte(ch & 0xFF) << Byte(ch >> 8);
     53   }
     54 }
     55 
     56 void EncodeToUtf16BE(std::ostream& stream, int ch) {
     57   if (!SplitUtf16HighChar(stream, &EncodeToUtf16BE, ch)) {
     58     stream << Byte(ch >> 8) << Byte(ch & 0xFF);
     59   }
     60 }
     61 
     62 void EncodeToUtf32LE(std::ostream& stream, int ch) {
     63   stream << Byte(ch & 0xFF) << Byte((ch >> 8) & 0xFF) << Byte((ch >> 16) & 0xFF)
     64          << Byte((ch >> 24) & 0xFF);
     65 }
     66 
     67 void EncodeToUtf32BE(std::ostream& stream, int ch) {
     68   stream << Byte((ch >> 24) & 0xFF) << Byte((ch >> 16) & 0xFF)
     69          << Byte((ch >> 8) & 0xFF) << Byte(ch & 0xFF);
     70 }
     71 
     72 class EncodingTest : public HandlerTest {
     73  protected:
     74   void SetUpEncoding(EncodingFn encoding, bool declareEncoding) {
     75     if (declareEncoding) {
     76       encoding(m_yaml, 0xFEFF);
     77     }
     78 
     79     AddEntry(encoding, 0x0021, 0x007E);  // Basic Latin
     80     AddEntry(encoding, 0x00A1, 0x00FF);  // Latin-1 Supplement
     81     AddEntry(encoding, 0x0660, 0x06FF);  // Arabic (largest contiguous block)
     82 
     83     // CJK unified ideographs (multiple lines)
     84     AddEntry(encoding, 0x4E00, 0x4EFF);
     85     AddEntry(encoding, 0x4F00, 0x4FFF);
     86     AddEntry(encoding, 0x5000, 0x51FF);  // 512 character line
     87     AddEntry(encoding, 0x5200, 0x54FF);  // 768 character line
     88     AddEntry(encoding, 0x5500, 0x58FF);  // 1024 character line
     89 
     90     AddEntry(encoding, 0x103A0, 0x103C3);  // Old Persian
     91 
     92     m_yaml.seekg(0, std::ios::beg);
     93   }
     94 
     95   void Run() {
     96     InSequence sequence;
     97     EXPECT_CALL(handler, OnDocumentStart(_));
     98     EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block));
     99     for (std::size_t i = 0; i < m_entries.size(); i++) {
    100       EXPECT_CALL(handler, OnScalar(_, "!", 0, m_entries[i]));
    101     }
    102     EXPECT_CALL(handler, OnSequenceEnd());
    103     EXPECT_CALL(handler, OnDocumentEnd());
    104 
    105     Parse(m_yaml.str());
    106   }
    107 
    108  private:
    109   std::stringstream m_yaml;
    110   std::vector<std::string> m_entries;
    111 
    112   void AddEntry(EncodingFn encoding, int startCh, int endCh) {
    113     encoding(m_yaml, '-');
    114     encoding(m_yaml, ' ');
    115     encoding(m_yaml, '|');
    116     encoding(m_yaml, '\n');
    117     encoding(m_yaml, ' ');
    118     encoding(m_yaml, ' ');
    119 
    120     std::stringstream entry;
    121     for (int ch = startCh; ch <= endCh; ++ch) {
    122       encoding(m_yaml, ch);
    123       EncodeToUtf8(entry, ch);
    124     }
    125     encoding(m_yaml, '\n');
    126     EncodeToUtf8(entry, '\n');
    127 
    128     m_entries.push_back(entry.str());
    129   }
    130 };
    131 
    132 TEST_F(EncodingTest, UTF8_noBOM) {
    133   SetUpEncoding(&EncodeToUtf8, false);
    134   Run();
    135 }
    136 
    137 TEST_F(EncodingTest, UTF8_BOM) {
    138   SetUpEncoding(&EncodeToUtf8, true);
    139   Run();
    140 }
    141 
    142 TEST_F(EncodingTest, UTF16LE_noBOM) {
    143   SetUpEncoding(&EncodeToUtf16LE, false);
    144   Run();
    145 }
    146 
    147 TEST_F(EncodingTest, UTF16LE_BOM) {
    148   SetUpEncoding(&EncodeToUtf16LE, true);
    149   Run();
    150 }
    151 
    152 TEST_F(EncodingTest, UTF16BE_noBOM) {
    153   SetUpEncoding(&EncodeToUtf16BE, false);
    154   Run();
    155 }
    156 
    157 TEST_F(EncodingTest, UTF16BE_BOM) {
    158   SetUpEncoding(&EncodeToUtf16BE, true);
    159   Run();
    160 }
    161 
    162 TEST_F(EncodingTest, UTF32LE_noBOM) {
    163   SetUpEncoding(&EncodeToUtf32LE, false);
    164   Run();
    165 }
    166 
    167 TEST_F(EncodingTest, UTF32LE_BOM) {
    168   SetUpEncoding(&EncodeToUtf32LE, true);
    169   Run();
    170 }
    171 
    172 TEST_F(EncodingTest, UTF32BE_noBOM) {
    173   SetUpEncoding(&EncodeToUtf32BE, false);
    174   Run();
    175 }
    176 
    177 TEST_F(EncodingTest, UTF32BE_BOM) {
    178   SetUpEncoding(&EncodeToUtf32BE, true);
    179   Run();
    180 }
    181 }
    182 }