encoding_test.cpp (4591B)
1 #include <sstream> 2 3 #include "handler_test.h" 4 #include "yaml-cpp/yaml.h" // IWYU pragma: keep 5 6 #include "gtest/gtest.h" 7 8 using ::testing::_; 9 using ::testing::InSequence; 10 11 namespace YAML { 12 namespace { 13 typedef void (*EncodingFn)(std::ostream&, int); 14 15 inline char Byte(int ch) { 16 return static_cast<char>( 17 static_cast<unsigned char>(static_cast<unsigned int>(ch))); 18 } 19 20 void EncodeToUtf8(std::ostream& stream, int ch) { 21 if (ch <= 0x7F) { 22 stream << Byte(ch); 23 } else if (ch <= 0x7FF) { 24 stream << Byte(0xC0 | (ch >> 6)); 25 stream << Byte(0x80 | (ch & 0x3F)); 26 } else if (ch <= 0xFFFF) { 27 stream << Byte(0xE0 | (ch >> 12)); 28 stream << Byte(0x80 | ((ch >> 6) & 0x3F)); 29 stream << Byte(0x80 | (ch & 0x3F)); 30 } else if (ch <= 0x1FFFFF) { 31 stream << Byte(0xF0 | (ch >> 18)); 32 stream << Byte(0x80 | ((ch >> 12) & 0x3F)); 33 stream << Byte(0x80 | ((ch >> 6) & 0x3F)); 34 stream << Byte(0x80 | (ch & 0x3F)); 35 } 36 } 37 38 bool SplitUtf16HighChar(std::ostream& stream, EncodingFn encoding, int ch) { 39 int biasedValue = ch - 0x10000; 40 if (biasedValue < 0) { 41 return false; 42 } 43 int high = 0xD800 | (biasedValue >> 10); 44 int low = 0xDC00 | (biasedValue & 0x3FF); 45 encoding(stream, high); 46 encoding(stream, low); 47 return true; 48 } 49 50 void EncodeToUtf16LE(std::ostream& stream, int ch) { 51 if (!SplitUtf16HighChar(stream, &EncodeToUtf16LE, ch)) { 52 stream << Byte(ch & 0xFF) << Byte(ch >> 8); 53 } 54 } 55 56 void EncodeToUtf16BE(std::ostream& stream, int ch) { 57 if (!SplitUtf16HighChar(stream, &EncodeToUtf16BE, ch)) { 58 stream << Byte(ch >> 8) << Byte(ch & 0xFF); 59 } 60 } 61 62 void EncodeToUtf32LE(std::ostream& stream, int ch) { 63 stream << Byte(ch & 0xFF) << Byte((ch >> 8) & 0xFF) << Byte((ch >> 16) & 0xFF) 64 << Byte((ch >> 24) & 0xFF); 65 } 66 67 void EncodeToUtf32BE(std::ostream& stream, int ch) { 68 stream << Byte((ch >> 24) & 0xFF) << Byte((ch >> 16) & 0xFF) 69 << Byte((ch >> 8) & 0xFF) << Byte(ch & 0xFF); 70 } 71 72 class EncodingTest : public HandlerTest { 73 protected: 74 void SetUpEncoding(EncodingFn encoding, bool declareEncoding) { 75 if (declareEncoding) { 76 encoding(m_yaml, 0xFEFF); 77 } 78 79 AddEntry(encoding, 0x0021, 0x007E); // Basic Latin 80 AddEntry(encoding, 0x00A1, 0x00FF); // Latin-1 Supplement 81 AddEntry(encoding, 0x0660, 0x06FF); // Arabic (largest contiguous block) 82 83 // CJK unified ideographs (multiple lines) 84 AddEntry(encoding, 0x4E00, 0x4EFF); 85 AddEntry(encoding, 0x4F00, 0x4FFF); 86 AddEntry(encoding, 0x5000, 0x51FF); // 512 character line 87 AddEntry(encoding, 0x5200, 0x54FF); // 768 character line 88 AddEntry(encoding, 0x5500, 0x58FF); // 1024 character line 89 90 AddEntry(encoding, 0x103A0, 0x103C3); // Old Persian 91 92 m_yaml.seekg(0, std::ios::beg); 93 } 94 95 void Run() { 96 InSequence sequence; 97 EXPECT_CALL(handler, OnDocumentStart(_)); 98 EXPECT_CALL(handler, OnSequenceStart(_, "?", 0, EmitterStyle::Block)); 99 for (std::size_t i = 0; i < m_entries.size(); i++) { 100 EXPECT_CALL(handler, OnScalar(_, "!", 0, m_entries[i])); 101 } 102 EXPECT_CALL(handler, OnSequenceEnd()); 103 EXPECT_CALL(handler, OnDocumentEnd()); 104 105 Parse(m_yaml.str()); 106 } 107 108 private: 109 std::stringstream m_yaml; 110 std::vector<std::string> m_entries; 111 112 void AddEntry(EncodingFn encoding, int startCh, int endCh) { 113 encoding(m_yaml, '-'); 114 encoding(m_yaml, ' '); 115 encoding(m_yaml, '|'); 116 encoding(m_yaml, '\n'); 117 encoding(m_yaml, ' '); 118 encoding(m_yaml, ' '); 119 120 std::stringstream entry; 121 for (int ch = startCh; ch <= endCh; ++ch) { 122 encoding(m_yaml, ch); 123 EncodeToUtf8(entry, ch); 124 } 125 encoding(m_yaml, '\n'); 126 EncodeToUtf8(entry, '\n'); 127 128 m_entries.push_back(entry.str()); 129 } 130 }; 131 132 TEST_F(EncodingTest, UTF8_noBOM) { 133 SetUpEncoding(&EncodeToUtf8, false); 134 Run(); 135 } 136 137 TEST_F(EncodingTest, UTF8_BOM) { 138 SetUpEncoding(&EncodeToUtf8, true); 139 Run(); 140 } 141 142 TEST_F(EncodingTest, UTF16LE_noBOM) { 143 SetUpEncoding(&EncodeToUtf16LE, false); 144 Run(); 145 } 146 147 TEST_F(EncodingTest, UTF16LE_BOM) { 148 SetUpEncoding(&EncodeToUtf16LE, true); 149 Run(); 150 } 151 152 TEST_F(EncodingTest, UTF16BE_noBOM) { 153 SetUpEncoding(&EncodeToUtf16BE, false); 154 Run(); 155 } 156 157 TEST_F(EncodingTest, UTF16BE_BOM) { 158 SetUpEncoding(&EncodeToUtf16BE, true); 159 Run(); 160 } 161 162 TEST_F(EncodingTest, UTF32LE_noBOM) { 163 SetUpEncoding(&EncodeToUtf32LE, false); 164 Run(); 165 } 166 167 TEST_F(EncodingTest, UTF32LE_BOM) { 168 SetUpEncoding(&EncodeToUtf32LE, true); 169 Run(); 170 } 171 172 TEST_F(EncodingTest, UTF32BE_noBOM) { 173 SetUpEncoding(&EncodeToUtf32BE, false); 174 Run(); 175 } 176 177 TEST_F(EncodingTest, UTF32BE_BOM) { 178 SetUpEncoding(&EncodeToUtf32BE, true); 179 Run(); 180 } 181 } 182 }