encoding-test.c++ (20860B)
1 // Copyright (c) 2017 Cloudflare, Inc. and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #include "encoding.h" 23 #include <kj/test.h> 24 #include <stdint.h> 25 26 namespace kj { 27 namespace { 28 29 CappedArray<char, sizeof(char ) * 2 + 1> hex(byte i) { return kj::hex((uint8_t )i); } 30 CappedArray<char, sizeof(char ) * 2 + 1> hex(char i) { return kj::hex((uint8_t )i); } 31 CappedArray<char, sizeof(char16_t) * 2 + 1> hex(char16_t i) { return kj::hex((uint16_t)i); } 32 CappedArray<char, sizeof(char32_t) * 2 + 1> hex(char32_t i) { return kj::hex((uint32_t)i); } 33 CappedArray<char, sizeof(uint32_t) * 2 + 1> hex(wchar_t i) { return kj::hex((uint32_t)i); } 34 // Hexify chars correctly. 35 // 36 // TODO(cleanup): Should this go into string.h with the other definitions of hex()? 37 38 template <typename T, typename U> 39 void expectResImpl(EncodingResult<T> result, 40 ArrayPtr<const U> expected, 41 bool errors = false) { 42 if (errors) { 43 KJ_EXPECT(result.hadErrors); 44 } else { 45 KJ_EXPECT(!result.hadErrors); 46 } 47 48 KJ_EXPECT(result.size() == expected.size(), result.size(), expected.size()); 49 for (auto i: kj::zeroTo(kj::min(result.size(), expected.size()))) { 50 KJ_EXPECT(result[i] == expected[i], i, hex(result[i]), hex(expected[i])); 51 } 52 } 53 54 template <typename T, typename U, size_t s> 55 void expectRes(EncodingResult<T> result, 56 const U (&expected)[s], 57 bool errors = false) { 58 expectResImpl(kj::mv(result), arrayPtr(expected, s - 1), errors); 59 } 60 61 #if __cplusplus >= 202000L 62 template <typename T, size_t s> 63 void expectRes(EncodingResult<T> result, 64 const char8_t (&expected)[s], 65 bool errors = false) { 66 expectResImpl(kj::mv(result), arrayPtr(reinterpret_cast<const char*>(expected), s - 1), errors); 67 } 68 #endif 69 70 template <typename T, size_t s> 71 void expectRes(EncodingResult<T> result, 72 byte (&expected)[s], 73 bool errors = false) { 74 expectResImpl(kj::mv(result), arrayPtr<const byte>(expected, s), errors); 75 } 76 77 // Handy reference for surrogate pair edge cases: 78 // 79 // \ud800 -> \xed\xa0\x80 80 // \udc00 -> \xed\xb0\x80 81 // \udbff -> \xed\xaf\xbf 82 // \udfff -> \xed\xbf\xbf 83 84 KJ_TEST("encode UTF-8 to UTF-16") { 85 expectRes(encodeUtf16(u8"foo"), u"foo"); 86 expectRes(encodeUtf16(u8"Здравствуйте"), u"Здравствуйте"); 87 expectRes(encodeUtf16(u8"中国网络"), u"中国网络"); 88 expectRes(encodeUtf16(u8"😺☁☄🐵"), u"😺☁☄🐵"); 89 } 90 91 KJ_TEST("invalid UTF-8 to UTF-16") { 92 // Disembodied continuation byte. 93 expectRes(encodeUtf16("\x80"), u"\ufffd", true); 94 expectRes(encodeUtf16("f\xbfo"), u"f\ufffdo", true); 95 expectRes(encodeUtf16("f\xbf\x80\xb0o"), u"f\ufffdo", true); 96 97 // Missing continuation bytes. 98 expectRes(encodeUtf16("\xc2x"), u"\ufffdx", true); 99 expectRes(encodeUtf16("\xe0x"), u"\ufffdx", true); 100 expectRes(encodeUtf16("\xe0\xa0x"), u"\ufffdx", true); 101 expectRes(encodeUtf16("\xf0x"), u"\ufffdx", true); 102 expectRes(encodeUtf16("\xf0\x90x"), u"\ufffdx", true); 103 expectRes(encodeUtf16("\xf0\x90\x80x"), u"\ufffdx", true); 104 105 // Overlong sequences. 106 expectRes(encodeUtf16("\xc0\x80"), u"\ufffd", true); 107 expectRes(encodeUtf16("\xc1\xbf"), u"\ufffd", true); 108 expectRes(encodeUtf16("\xc2\x80"), u"\u0080", false); 109 expectRes(encodeUtf16("\xdf\xbf"), u"\u07ff", false); 110 111 expectRes(encodeUtf16("\xe0\x80\x80"), u"\ufffd", true); 112 expectRes(encodeUtf16("\xe0\x9f\xbf"), u"\ufffd", true); 113 expectRes(encodeUtf16("\xe0\xa0\x80"), u"\u0800", false); 114 expectRes(encodeUtf16("\xef\xbf\xbe"), u"\ufffe", false); 115 116 // Due to a classic off-by-one error, GCC 4.x rather hilariously encodes '\uffff' as the 117 // "surrogate pair" 0xd7ff, 0xdfff: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41698 118 if (kj::size(u"\uffff") == 2) { 119 expectRes(encodeUtf16("\xef\xbf\xbf"), u"\uffff", false); 120 } 121 122 expectRes(encodeUtf16("\xf0\x80\x80\x80"), u"\ufffd", true); 123 expectRes(encodeUtf16("\xf0\x8f\xbf\xbf"), u"\ufffd", true); 124 expectRes(encodeUtf16("\xf0\x90\x80\x80"), u"\U00010000", false); 125 expectRes(encodeUtf16("\xf4\x8f\xbf\xbf"), u"\U0010ffff", false); 126 127 // Out of Unicode range. 128 expectRes(encodeUtf16("\xf5\x80\x80\x80"), u"\ufffd", true); 129 expectRes(encodeUtf16("\xf8\xbf\x80\x80\x80"), u"\ufffd", true); 130 expectRes(encodeUtf16("\xfc\xbf\x80\x80\x80\x80"), u"\ufffd", true); 131 expectRes(encodeUtf16("\xfe\xbf\x80\x80\x80\x80\x80"), u"\ufffd", true); 132 expectRes(encodeUtf16("\xff\xbf\x80\x80\x80\x80\x80\x80"), u"\ufffd", true); 133 134 // Surrogates encoded as separate UTF-8 code points are flagged as errors but allowed to decode 135 // to UTF-16 surrogate values. 136 expectRes(encodeUtf16("\xed\xb0\x80\xed\xaf\xbf"), u"\xdc00\xdbff", true); 137 expectRes(encodeUtf16("\xed\xbf\xbf\xed\xa0\x80"), u"\xdfff\xd800", true); 138 139 expectRes(encodeUtf16("\xed\xb0\x80\xed\xbf\xbf"), u"\xdc00\xdfff", true); 140 expectRes(encodeUtf16("f\xed\xa0\x80"), u"f\xd800", true); 141 expectRes(encodeUtf16("f\xed\xa0\x80x"), u"f\xd800x", true); 142 expectRes(encodeUtf16("f\xed\xa0\x80\xed\xa0\x80x"), u"f\xd800\xd800x", true); 143 144 // However, if successive UTF-8 codepoints decode to a proper surrogate pair, the second 145 // surrogate is replaced with the Unicode replacement character to avoid creating valid UTF-16. 146 expectRes(encodeUtf16("\xed\xa0\x80\xed\xbf\xbf"), u"\xd800\xfffd", true); 147 expectRes(encodeUtf16("\xed\xaf\xbf\xed\xb0\x80"), u"\xdbff\xfffd", true); 148 } 149 150 KJ_TEST("encode UTF-8 to UTF-32") { 151 expectRes(encodeUtf32(u8"foo"), U"foo"); 152 expectRes(encodeUtf32(u8"Здравствуйте"), U"Здравствуйте"); 153 expectRes(encodeUtf32(u8"中国网络"), U"中国网络"); 154 expectRes(encodeUtf32(u8"😺☁☄🐵"), U"😺☁☄🐵"); 155 } 156 157 KJ_TEST("invalid UTF-8 to UTF-32") { 158 // Disembodied continuation byte. 159 expectRes(encodeUtf32("\x80"), U"\ufffd", true); 160 expectRes(encodeUtf32("f\xbfo"), U"f\ufffdo", true); 161 expectRes(encodeUtf32("f\xbf\x80\xb0o"), U"f\ufffdo", true); 162 163 // Missing continuation bytes. 164 expectRes(encodeUtf32("\xc2x"), U"\ufffdx", true); 165 expectRes(encodeUtf32("\xe0x"), U"\ufffdx", true); 166 expectRes(encodeUtf32("\xe0\xa0x"), U"\ufffdx", true); 167 expectRes(encodeUtf32("\xf0x"), U"\ufffdx", true); 168 expectRes(encodeUtf32("\xf0\x90x"), U"\ufffdx", true); 169 expectRes(encodeUtf32("\xf0\x90\x80x"), U"\ufffdx", true); 170 171 // Overlong sequences. 172 expectRes(encodeUtf32("\xc0\x80"), U"\ufffd", true); 173 expectRes(encodeUtf32("\xc1\xbf"), U"\ufffd", true); 174 expectRes(encodeUtf32("\xc2\x80"), U"\u0080", false); 175 expectRes(encodeUtf32("\xdf\xbf"), U"\u07ff", false); 176 177 expectRes(encodeUtf32("\xe0\x80\x80"), U"\ufffd", true); 178 expectRes(encodeUtf32("\xe0\x9f\xbf"), U"\ufffd", true); 179 expectRes(encodeUtf32("\xe0\xa0\x80"), U"\u0800", false); 180 expectRes(encodeUtf32("\xef\xbf\xbf"), U"\uffff", false); 181 182 expectRes(encodeUtf32("\xf0\x80\x80\x80"), U"\ufffd", true); 183 expectRes(encodeUtf32("\xf0\x8f\xbf\xbf"), U"\ufffd", true); 184 expectRes(encodeUtf32("\xf0\x90\x80\x80"), U"\U00010000", false); 185 expectRes(encodeUtf32("\xf4\x8f\xbf\xbf"), U"\U0010ffff", false); 186 187 // Out of Unicode range. 188 expectRes(encodeUtf32("\xf5\x80\x80\x80"), U"\ufffd", true); 189 expectRes(encodeUtf32("\xf8\xbf\x80\x80\x80"), U"\ufffd", true); 190 expectRes(encodeUtf32("\xfc\xbf\x80\x80\x80\x80"), U"\ufffd", true); 191 expectRes(encodeUtf32("\xfe\xbf\x80\x80\x80\x80\x80"), U"\ufffd", true); 192 expectRes(encodeUtf32("\xff\xbf\x80\x80\x80\x80\x80\x80"), U"\ufffd", true); 193 } 194 195 KJ_TEST("decode UTF-16 to UTF-8") { 196 expectRes(decodeUtf16(u"foo"), u8"foo"); 197 expectRes(decodeUtf16(u"Здравствуйте"), u8"Здравствуйте"); 198 expectRes(decodeUtf16(u"中国网络"), u8"中国网络"); 199 expectRes(decodeUtf16(u"😺☁☄🐵"), u8"😺☁☄🐵"); 200 } 201 202 KJ_TEST("invalid UTF-16 to UTF-8") { 203 // Surrogates in wrong order. 204 expectRes(decodeUtf16(u"\xdc00\xdbff"), 205 "\xed\xb0\x80\xed\xaf\xbf", true); 206 expectRes(decodeUtf16(u"\xdfff\xd800"), 207 "\xed\xbf\xbf\xed\xa0\x80", true); 208 209 // Missing second surrogate. 210 expectRes(decodeUtf16(u"f\xd800"), "f\xed\xa0\x80", true); 211 expectRes(decodeUtf16(u"f\xd800x"), "f\xed\xa0\x80x", true); 212 expectRes(decodeUtf16(u"f\xd800\xd800x"), "f\xed\xa0\x80\xed\xa0\x80x", true); 213 } 214 215 KJ_TEST("decode UTF-32 to UTF-8") { 216 expectRes(decodeUtf32(U"foo"), u8"foo"); 217 expectRes(decodeUtf32(U"Здравствуйте"), u8"Здравствуйте"); 218 expectRes(decodeUtf32(U"中国网络"), u8"中国网络"); 219 expectRes(decodeUtf32(U"😺☁☄🐵"), u8"😺☁☄🐵"); 220 } 221 222 KJ_TEST("invalid UTF-32 to UTF-8") { 223 // Surrogates rejected. 224 expectRes(decodeUtf32(U"\xdfff\xd800"), 225 "\xed\xbf\xbf\xed\xa0\x80", true); 226 227 // Even if it would be a valid surrogate pair in UTF-16. 228 expectRes(decodeUtf32(U"\xd800\xdfff"), 229 "\xed\xa0\x80\xed\xbf\xbf", true); 230 } 231 232 KJ_TEST("round-trip invalid UTF-16") { 233 const char16_t INVALID[] = u"\xdfff foo \xd800\xdc00 bar \xdc00\xd800 baz \xdbff qux \xd800"; 234 235 expectRes(encodeUtf16(decodeUtf16(INVALID)), INVALID, true); 236 expectRes(encodeUtf16(decodeUtf32(encodeUtf32(decodeUtf16(INVALID)))), INVALID, true); 237 } 238 239 KJ_TEST("EncodingResult as a Maybe") { 240 KJ_IF_MAYBE(result, encodeUtf16("\x80")) { 241 KJ_FAIL_EXPECT("expected failure"); 242 } 243 244 KJ_IF_MAYBE(result, encodeUtf16("foo")) { 245 // good 246 } else { 247 KJ_FAIL_EXPECT("expected success"); 248 } 249 250 KJ_EXPECT(KJ_ASSERT_NONNULL(decodeUtf16(u"foo")) == "foo"); 251 } 252 253 KJ_TEST("encode to wchar_t") { 254 expectRes(encodeWideString(u8"foo"), L"foo"); 255 expectRes(encodeWideString(u8"Здравствуйте"), L"Здравствуйте"); 256 expectRes(encodeWideString(u8"中国网络"), L"中国网络"); 257 expectRes(encodeWideString(u8"😺☁☄🐵"), L"😺☁☄🐵"); 258 } 259 260 KJ_TEST("decode from wchar_t") { 261 expectRes(decodeWideString(L"foo"), u8"foo"); 262 expectRes(decodeWideString(L"Здравствуйте"), u8"Здравствуйте"); 263 expectRes(decodeWideString(L"中国网络"), u8"中国网络"); 264 expectRes(decodeWideString(L"😺☁☄🐵"), u8"😺☁☄🐵"); 265 } 266 267 // ======================================================================================= 268 269 KJ_TEST("hex encoding/decoding") { 270 byte bytes[] = {0x12, 0x34, 0xab, 0xf2}; 271 272 KJ_EXPECT(encodeHex(bytes) == "1234abf2"); 273 274 expectRes(decodeHex("1234abf2"), bytes); 275 276 expectRes(decodeHex("1234abf21"), bytes, true); 277 278 bytes[2] = 0xa0; 279 expectRes(decodeHex("1234axf2"), bytes, true); 280 281 bytes[2] = 0x0b; 282 expectRes(decodeHex("1234xbf2"), bytes, true); 283 } 284 285 constexpr char RFC2396_FRAGMENT_SET_DIFF[] = "#$&+,/:;=?@[\\]^{|}"; 286 // These are the characters reserved in RFC 2396, but not in the fragment percent encode set. 287 288 KJ_TEST("URI encoding/decoding") { 289 KJ_EXPECT(encodeUriComponent("foo") == "foo"); 290 KJ_EXPECT(encodeUriComponent("foo bar") == "foo%20bar"); 291 KJ_EXPECT(encodeUriComponent("\xab\xba") == "%AB%BA"); 292 KJ_EXPECT(encodeUriComponent(StringPtr("foo\0bar", 7)) == "foo%00bar"); 293 294 KJ_EXPECT(encodeUriComponent(RFC2396_FRAGMENT_SET_DIFF) == 295 "%23%24%26%2B%2C%2F%3A%3B%3D%3F%40%5B%5C%5D%5E%7B%7C%7D"); 296 297 // Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396. 298 KJ_EXPECT(encodeUriComponent("'foo'! (~)") == "'foo'!%20(~)"); 299 300 expectRes(decodeUriComponent("foo%20bar"), "foo bar"); 301 expectRes(decodeUriComponent("%ab%BA"), "\xab\xba"); 302 303 expectRes(decodeUriComponent("foo%1xxx"), "foo\1xxx", true); 304 expectRes(decodeUriComponent("foo%1"), "foo\1", true); 305 expectRes(decodeUriComponent("foo%xxx"), "fooxxx", true); 306 expectRes(decodeUriComponent("foo%"), "foo", true); 307 308 { 309 byte bytes[] = {12, 34, 56}; 310 KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes); 311 312 // decodeBinaryUriComponent() takes a DecodeUriOptions struct as its second parameter, but it 313 // once took a single `bool nulTerminate`. Verify that the old behavior still compiles and 314 // works. 315 auto bytesWithNul = decodeBinaryUriComponent(encodeUriComponent(bytes), true); 316 KJ_ASSERT(bytesWithNul.size() == 4); 317 KJ_EXPECT(bytesWithNul[3] == '\0'); 318 KJ_EXPECT(bytesWithNul.slice(0, 3) == bytes); 319 } 320 } 321 322 KJ_TEST("URL component encoding") { 323 KJ_EXPECT(encodeUriFragment("foo") == "foo"); 324 KJ_EXPECT(encodeUriFragment("foo bar") == "foo%20bar"); 325 KJ_EXPECT(encodeUriFragment("\xab\xba") == "%AB%BA"); 326 KJ_EXPECT(encodeUriFragment(StringPtr("foo\0bar", 7)) == "foo%00bar"); 327 328 KJ_EXPECT(encodeUriFragment(RFC2396_FRAGMENT_SET_DIFF) == RFC2396_FRAGMENT_SET_DIFF); 329 330 KJ_EXPECT(encodeUriPath("foo") == "foo"); 331 KJ_EXPECT(encodeUriPath("foo bar") == "foo%20bar"); 332 KJ_EXPECT(encodeUriPath("\xab\xba") == "%AB%BA"); 333 KJ_EXPECT(encodeUriPath(StringPtr("foo\0bar", 7)) == "foo%00bar"); 334 335 KJ_EXPECT(encodeUriPath(RFC2396_FRAGMENT_SET_DIFF) == "%23$&+,%2F:;=%3F@[%5C]^%7B|%7D"); 336 337 KJ_EXPECT(encodeUriUserInfo("foo") == "foo"); 338 KJ_EXPECT(encodeUriUserInfo("foo bar") == "foo%20bar"); 339 KJ_EXPECT(encodeUriUserInfo("\xab\xba") == "%AB%BA"); 340 KJ_EXPECT(encodeUriUserInfo(StringPtr("foo\0bar", 7)) == "foo%00bar"); 341 342 KJ_EXPECT(encodeUriUserInfo(RFC2396_FRAGMENT_SET_DIFF) == 343 "%23$&+,%2F%3A%3B%3D%3F%40%5B%5C%5D%5E%7B%7C%7D"); 344 345 // NOTE: None of these functions have explicit decode equivalents. 346 } 347 348 KJ_TEST("application/x-www-form-urlencoded encoding/decoding") { 349 KJ_EXPECT(encodeWwwForm("foo") == "foo"); 350 KJ_EXPECT(encodeWwwForm("foo bar") == "foo+bar"); 351 KJ_EXPECT(encodeWwwForm("\xab\xba") == "%AB%BA"); 352 KJ_EXPECT(encodeWwwForm(StringPtr("foo\0bar", 7)) == "foo%00bar"); 353 354 // Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396. 355 KJ_EXPECT(encodeWwwForm("'foo'! (~)") == "%27foo%27%21+%28%7E%29"); 356 357 expectRes(decodeWwwForm("foo%20bar"), "foo bar"); 358 expectRes(decodeWwwForm("foo+bar"), "foo bar"); 359 expectRes(decodeWwwForm("%ab%BA"), "\xab\xba"); 360 361 expectRes(decodeWwwForm("foo%1xxx"), "foo\1xxx", true); 362 expectRes(decodeWwwForm("foo%1"), "foo\1", true); 363 expectRes(decodeWwwForm("foo%xxx"), "fooxxx", true); 364 expectRes(decodeWwwForm("foo%"), "foo", true); 365 366 { 367 byte bytes[] = {12, 34, 56}; 368 DecodeUriOptions options { /*.nulTerminate=*/false, /*.plusToSpace=*/true }; 369 KJ_EXPECT(decodeBinaryUriComponent(encodeWwwForm(bytes), options) == bytes); 370 } 371 } 372 373 KJ_TEST("C escape encoding/decoding") { 374 KJ_EXPECT(encodeCEscape("fooo\a\b\f\n\r\t\v\'\"\\barПривет, Мир! Ж=О") == 375 "fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82\x2c\x20\xd0\x9c\xd0\xb8\xd1\x80\x21\x20\xd0\x96\x3d\xd0\x9e"); 376 KJ_EXPECT(encodeCEscape("foo\x01\x7fxxx") == 377 "foo\\001\\177xxx"); 378 byte bytes[] = {'f', 'o', 'o', 0, '\x01', '\x7f', 'x', 'x', 'x', 128, 254, 255}; 379 KJ_EXPECT(encodeCEscape(bytes) == "foo\\000\\001\\177xxx\\200\\376\\377"); 380 381 expectRes(decodeCEscape("fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar"), 382 "fooo\a\b\f\n\r\t\v\'\"\\bar"); 383 expectRes(decodeCEscape("foo\\x01\\x7fxxx"), "foo\x01\x7fxxx"); 384 expectRes(decodeCEscape("foo\\001\\177234"), "foo\001\177234"); 385 expectRes(decodeCEscape("foo\\x1"), "foo\x1"); 386 expectRes(decodeCEscape("foo\\1"), "foo\1"); 387 388 expectRes(decodeCEscape("foo\\u1234bar"), u8"foo\u1234bar"); 389 expectRes(decodeCEscape("foo\\U00045678bar"), u8"foo\U00045678bar"); 390 391 // Error cases. 392 expectRes(decodeCEscape("foo\\"), "foo", true); 393 expectRes(decodeCEscape("foo\\x123x"), u8"foo\x23x", true); 394 expectRes(decodeCEscape("foo\\u12"), u8"foo\u0012", true); 395 expectRes(decodeCEscape("foo\\u12xxx"), u8"foo\u0012xxx", true); 396 expectRes(decodeCEscape("foo\\U12"), u8"foo\u0012", true); 397 expectRes(decodeCEscape("foo\\U12xxxxxxxx"), u8"foo\u0012xxxxxxxx", true); 398 } 399 400 KJ_TEST("base64 encoding/decoding") { 401 { 402 auto encoded = encodeBase64(StringPtr("").asBytes(), false); 403 KJ_EXPECT(encoded == "", encoded, encoded.size()); 404 KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == ""); 405 } 406 407 { 408 auto encoded = encodeBase64(StringPtr("foo").asBytes(), false); 409 KJ_EXPECT(encoded == "Zm9v", encoded, encoded.size()); 410 auto decoded = decodeBase64(encoded.asArray()); 411 KJ_EXPECT(!decoded.hadErrors); 412 KJ_EXPECT(heapString(decoded.asChars()) == "foo"); 413 } 414 415 { 416 auto encoded = encodeBase64(StringPtr("quux").asBytes(), false); 417 KJ_EXPECT(encoded == "cXV1eA==", encoded, encoded.size()); 418 KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == "quux"); 419 } 420 421 { 422 auto encoded = encodeBase64(StringPtr("corge").asBytes(), false); 423 KJ_EXPECT(encoded == "Y29yZ2U=", encoded); 424 auto decoded = decodeBase64(encoded.asArray()); 425 KJ_EXPECT(!decoded.hadErrors); 426 KJ_EXPECT(heapString(decoded.asChars()) == "corge"); 427 } 428 429 { 430 auto decoded = decodeBase64("Y29yZ2U"); 431 KJ_EXPECT(!decoded.hadErrors); 432 KJ_EXPECT(heapString(decoded.asChars()) == "corge"); 433 } 434 435 { 436 auto decoded = decodeBase64("Y\n29y Z@2U=\n"); 437 KJ_EXPECT(decoded.hadErrors); // @-sign is invalid base64 input. 438 KJ_EXPECT(heapString(decoded.asChars()) == "corge"); 439 } 440 441 { 442 auto decoded = decodeBase64("Y\n29y Z2U=\n"); 443 KJ_EXPECT(!decoded.hadErrors); 444 KJ_EXPECT(heapString(decoded.asChars()) == "corge"); 445 } 446 447 // Too much padding. 448 KJ_EXPECT(decodeBase64("Y29yZ2U==").hadErrors); 449 KJ_EXPECT(decodeBase64("Y29yZ===").hadErrors); 450 451 // Non-terminal padding. 452 KJ_EXPECT(decodeBase64("ab=c").hadErrors); 453 454 { 455 auto encoded = encodeBase64(StringPtr("corge").asBytes(), true); 456 KJ_EXPECT(encoded == "Y29yZ2U=\n", encoded); 457 } 458 459 StringPtr fullLine = "012345678901234567890123456789012345678901234567890123"; 460 { 461 auto encoded = encodeBase64(fullLine.asBytes(), false); 462 KJ_EXPECT( 463 encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz", 464 encoded); 465 } 466 { 467 auto encoded = encodeBase64(fullLine.asBytes(), true); 468 KJ_EXPECT( 469 encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz\n", 470 encoded); 471 } 472 473 String multiLine = str(fullLine, "456"); 474 { 475 auto encoded = encodeBase64(multiLine.asBytes(), false); 476 KJ_EXPECT( 477 encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2", 478 encoded); 479 } 480 { 481 auto encoded = encodeBase64(multiLine.asBytes(), true); 482 KJ_EXPECT( 483 encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz\n" 484 "NDU2\n", 485 encoded); 486 } 487 } 488 489 KJ_TEST("base64 url encoding") { 490 { 491 // Handles empty. 492 auto encoded = encodeBase64Url(StringPtr("").asBytes()); 493 KJ_EXPECT(encoded == "", encoded, encoded.size()); 494 } 495 496 { 497 // Handles paddingless encoding. 498 auto encoded = encodeBase64Url(StringPtr("foo").asBytes()); 499 KJ_EXPECT(encoded == "Zm9v", encoded, encoded.size()); 500 } 501 502 { 503 // Handles padded encoding. 504 auto encoded1 = encodeBase64Url(StringPtr("quux").asBytes()); 505 KJ_EXPECT(encoded1 == "cXV1eA", encoded1, encoded1.size()); 506 auto encoded2 = encodeBase64Url(StringPtr("corge").asBytes()); 507 KJ_EXPECT(encoded2 == "Y29yZ2U", encoded2, encoded2.size()); 508 } 509 510 { 511 // No line breaks. 512 StringPtr fullLine = "012345678901234567890123456789012345678901234567890123"; 513 auto encoded = encodeBase64Url(StringPtr(fullLine).asBytes()); 514 KJ_EXPECT( 515 encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz", 516 encoded); 517 } 518 519 { 520 // Replaces plusses. 521 const byte data[] = { 0b11111011, 0b11101111, 0b10111110 }; 522 auto encoded = encodeBase64Url(data); 523 KJ_EXPECT(encoded == "----", encoded, encoded.size(), data); 524 } 525 526 { 527 // Replaces slashes. 528 const byte data[] = { 0b11111111, 0b11111111, 0b11111111 }; 529 auto encoded = encodeBase64Url(data); 530 KJ_EXPECT(encoded == "____", encoded, encoded.size(), data); 531 } 532 } 533 534 } // namespace 535 } // namespace kj