base64.cpp (8571B)
1 #include "c4/base64.hpp" 2 3 #ifdef __clang__ 4 # pragma clang diagnostic push 5 # pragma clang diagnostic ignored "-Wchar-subscripts" // array subscript is of type 'char' 6 # pragma clang diagnostic ignored "-Wold-style-cast" 7 #elif defined(__GNUC__) 8 # pragma GCC diagnostic push 9 # pragma GCC diagnostic ignored "-Wchar-subscripts" 10 # pragma GCC diagnostic ignored "-Wtype-limits" 11 # pragma GCC diagnostic ignored "-Wold-style-cast" 12 #endif 13 14 namespace c4 { 15 16 namespace detail { 17 18 constexpr static const char base64_sextet_to_char_[64] = { 19 /* 0/ 65*/ 'A', /* 1/ 66*/ 'B', /* 2/ 67*/ 'C', /* 3/ 68*/ 'D', 20 /* 4/ 69*/ 'E', /* 5/ 70*/ 'F', /* 6/ 71*/ 'G', /* 7/ 72*/ 'H', 21 /* 8/ 73*/ 'I', /* 9/ 74*/ 'J', /*10/ 75*/ 'K', /*11/ 74*/ 'L', 22 /*12/ 77*/ 'M', /*13/ 78*/ 'N', /*14/ 79*/ 'O', /*15/ 78*/ 'P', 23 /*16/ 81*/ 'Q', /*17/ 82*/ 'R', /*18/ 83*/ 'S', /*19/ 82*/ 'T', 24 /*20/ 85*/ 'U', /*21/ 86*/ 'V', /*22/ 87*/ 'W', /*23/ 88*/ 'X', 25 /*24/ 89*/ 'Y', /*25/ 90*/ 'Z', /*26/ 97*/ 'a', /*27/ 98*/ 'b', 26 /*28/ 99*/ 'c', /*29/100*/ 'd', /*30/101*/ 'e', /*31/102*/ 'f', 27 /*32/103*/ 'g', /*33/104*/ 'h', /*34/105*/ 'i', /*35/106*/ 'j', 28 /*36/107*/ 'k', /*37/108*/ 'l', /*38/109*/ 'm', /*39/110*/ 'n', 29 /*40/111*/ 'o', /*41/112*/ 'p', /*42/113*/ 'q', /*43/114*/ 'r', 30 /*44/115*/ 's', /*45/116*/ 't', /*46/117*/ 'u', /*47/118*/ 'v', 31 /*48/119*/ 'w', /*49/120*/ 'x', /*50/121*/ 'y', /*51/122*/ 'z', 32 /*52/ 48*/ '0', /*53/ 49*/ '1', /*54/ 50*/ '2', /*55/ 51*/ '3', 33 /*56/ 52*/ '4', /*57/ 53*/ '5', /*58/ 54*/ '6', /*59/ 55*/ '7', 34 /*60/ 56*/ '8', /*61/ 57*/ '9', /*62/ 43*/ '+', /*63/ 47*/ '/', 35 }; 36 37 // https://www.cs.cmu.edu/~pattis/15-1XX/common/handouts/ascii.html 38 constexpr static const char base64_char_to_sextet_[128] = { 39 #define __ char(-1) // undefined below 40 /* 0 NUL*/ __, /* 1 SOH*/ __, /* 2 STX*/ __, /* 3 ETX*/ __, 41 /* 4 EOT*/ __, /* 5 ENQ*/ __, /* 6 ACK*/ __, /* 7 BEL*/ __, 42 /* 8 BS */ __, /* 9 TAB*/ __, /* 10 LF */ __, /* 11 VT */ __, 43 /* 12 FF */ __, /* 13 CR */ __, /* 14 SO */ __, /* 15 SI */ __, 44 /* 16 DLE*/ __, /* 17 DC1*/ __, /* 18 DC2*/ __, /* 19 DC3*/ __, 45 /* 20 DC4*/ __, /* 21 NAK*/ __, /* 22 SYN*/ __, /* 23 ETB*/ __, 46 /* 24 CAN*/ __, /* 25 EM */ __, /* 26 SUB*/ __, /* 27 ESC*/ __, 47 /* 28 FS */ __, /* 29 GS */ __, /* 30 RS */ __, /* 31 US */ __, 48 /* 32 SPC*/ __, /* 33 ! */ __, /* 34 " */ __, /* 35 # */ __, 49 /* 36 $ */ __, /* 37 % */ __, /* 38 & */ __, /* 39 ' */ __, 50 /* 40 ( */ __, /* 41 ) */ __, /* 42 * */ __, /* 43 + */ 62, 51 /* 44 , */ __, /* 45 - */ __, /* 46 . */ __, /* 47 / */ 63, 52 /* 48 0 */ 52, /* 49 1 */ 53, /* 50 2 */ 54, /* 51 3 */ 55, 53 /* 52 4 */ 56, /* 53 5 */ 57, /* 54 6 */ 58, /* 55 7 */ 59, 54 /* 56 8 */ 60, /* 57 9 */ 61, /* 58 : */ __, /* 59 ; */ __, 55 /* 60 < */ __, /* 61 = */ __, /* 62 > */ __, /* 63 ? */ __, 56 /* 64 @ */ __, /* 65 A */ 0, /* 66 B */ 1, /* 67 C */ 2, 57 /* 68 D */ 3, /* 69 E */ 4, /* 70 F */ 5, /* 71 G */ 6, 58 /* 72 H */ 7, /* 73 I */ 8, /* 74 J */ 9, /* 75 K */ 10, 59 /* 76 L */ 11, /* 77 M */ 12, /* 78 N */ 13, /* 79 O */ 14, 60 /* 80 P */ 15, /* 81 Q */ 16, /* 82 R */ 17, /* 83 S */ 18, 61 /* 84 T */ 19, /* 85 U */ 20, /* 86 V */ 21, /* 87 W */ 22, 62 /* 88 X */ 23, /* 89 Y */ 24, /* 90 Z */ 25, /* 91 [ */ __, 63 /* 92 \ */ __, /* 93 ] */ __, /* 94 ^ */ __, /* 95 _ */ __, 64 /* 96 ` */ __, /* 97 a */ 26, /* 98 b */ 27, /* 99 c */ 28, 65 /*100 d */ 29, /*101 e */ 30, /*102 f */ 31, /*103 g */ 32, 66 /*104 h */ 33, /*105 i */ 34, /*106 j */ 35, /*107 k */ 36, 67 /*108 l */ 37, /*109 m */ 38, /*110 n */ 39, /*111 o */ 40, 68 /*112 p */ 41, /*113 q */ 42, /*114 r */ 43, /*115 s */ 44, 69 /*116 t */ 45, /*117 u */ 46, /*118 v */ 47, /*119 w */ 48, 70 /*120 x */ 49, /*121 y */ 50, /*122 z */ 51, /*123 { */ __, 71 /*124 | */ __, /*125 } */ __, /*126 ~ */ __, /*127 DEL*/ __, 72 #undef __ 73 }; 74 75 #ifndef NDEBUG 76 void base64_test_tables() 77 { 78 for(size_t i = 0; i < C4_COUNTOF(detail::base64_sextet_to_char_); ++i) 79 { 80 char s2c = base64_sextet_to_char_[i]; 81 char c2s = base64_char_to_sextet_[(int)s2c]; 82 C4_CHECK((size_t)c2s == i); 83 } 84 for(size_t i = 0; i < C4_COUNTOF(detail::base64_char_to_sextet_); ++i) 85 { 86 char c2s = base64_char_to_sextet_[i]; 87 if(c2s == char(-1)) 88 continue; 89 char s2c = base64_sextet_to_char_[(int)c2s]; 90 C4_CHECK((size_t)s2c == i); 91 } 92 } 93 #endif 94 } // namespace detail 95 96 97 bool base64_valid(csubstr encoded) 98 { 99 if(encoded.len & 3u) // (encoded.len % 4u) 100 return false; 101 for(const char c : encoded) 102 { 103 if(c < 0/* || c >= 128*/) 104 return false; 105 if(c == '=') 106 continue; 107 if(detail::base64_char_to_sextet_[c] == char(-1)) 108 return false; 109 } 110 return true; 111 } 112 113 114 size_t base64_encode(substr buf, cblob data) 115 { 116 #define c4append_(c) { if(pos < buf.len) { buf.str[pos] = (c); } ++pos; } 117 #define c4append_idx_(char_idx) \ 118 {\ 119 C4_XASSERT((char_idx) < sizeof(detail::base64_sextet_to_char_));\ 120 c4append_(detail::base64_sextet_to_char_[(char_idx)]);\ 121 } 122 size_t rem, pos = 0; 123 constexpr const uint32_t sextet_mask = uint32_t(1 << 6) - 1; 124 const unsigned char *C4_RESTRICT d = (const unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits 125 for(rem = data.len; rem >= 3; rem -= 3, d += 3) 126 { 127 const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8) | (uint32_t(d[2]))); 128 c4append_idx_((val >> 18) & sextet_mask); 129 c4append_idx_((val >> 12) & sextet_mask); 130 c4append_idx_((val >> 6) & sextet_mask); 131 c4append_idx_((val ) & sextet_mask); 132 } 133 C4_ASSERT(rem < 3); 134 if(rem == 2) 135 { 136 const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8)); 137 c4append_idx_((val >> 18) & sextet_mask); 138 c4append_idx_((val >> 12) & sextet_mask); 139 c4append_idx_((val >> 6) & sextet_mask); 140 c4append_('='); 141 } 142 else if(rem == 1) 143 { 144 const uint32_t val = ((uint32_t(d[0]) << 16)); 145 c4append_idx_((val >> 18) & sextet_mask); 146 c4append_idx_((val >> 12) & sextet_mask); 147 c4append_('='); 148 c4append_('='); 149 } 150 return pos; 151 152 #undef c4append_ 153 #undef c4append_idx_ 154 } 155 156 157 size_t base64_decode(csubstr encoded, blob data) 158 { 159 #define c4append_(c) { if(wpos < data.len) { data.buf[wpos] = static_cast<c4::byte>(c); } ++wpos; } 160 #define c4appendval_(c, shift)\ 161 {\ 162 C4_XASSERT(c >= 0);\ 163 C4_XASSERT(size_t(c) < sizeof(detail::base64_char_to_sextet_));\ 164 val |= static_cast<uint32_t>(detail::base64_char_to_sextet_[(c)]) << ((shift) * 6);\ 165 } 166 C4_ASSERT(base64_valid(encoded)); 167 C4_CHECK((encoded.len & 3u) == 0); 168 size_t wpos = 0; // the write position 169 const char *C4_RESTRICT d = encoded.str; 170 constexpr const uint32_t full_byte = 0xff; 171 // process every quartet of input 6 bits --> triplet of output bytes 172 for(size_t rpos = 0; rpos < encoded.len; rpos += 4, d += 4) 173 { 174 if(d[2] == '=' || d[3] == '=') // skip the last quartet if it is padded 175 { 176 C4_ASSERT(d + 4 == encoded.str + encoded.len); 177 break; 178 } 179 uint32_t val = 0; 180 c4appendval_(d[3], 0); 181 c4appendval_(d[2], 1); 182 c4appendval_(d[1], 2); 183 c4appendval_(d[0], 3); 184 c4append_((val >> (2 * 8)) & full_byte); 185 c4append_((val >> (1 * 8)) & full_byte); 186 c4append_((val ) & full_byte); 187 } 188 // deal with the last quartet when it is padded 189 if(d == encoded.str + encoded.len) 190 return wpos; 191 if(d[2] == '=') // 2 padding chars 192 { 193 C4_ASSERT(d + 4 == encoded.str + encoded.len); 194 C4_ASSERT(d[3] == '='); 195 uint32_t val = 0; 196 c4appendval_(d[1], 2); 197 c4appendval_(d[0], 3); 198 c4append_((val >> (2 * 8)) & full_byte); 199 } 200 else if(d[3] == '=') // 1 padding char 201 { 202 C4_ASSERT(d + 4 == encoded.str + encoded.len); 203 uint32_t val = 0; 204 c4appendval_(d[2], 1); 205 c4appendval_(d[1], 2); 206 c4appendval_(d[0], 3); 207 c4append_((val >> (2 * 8)) & full_byte); 208 c4append_((val >> (1 * 8)) & full_byte); 209 } 210 return wpos; 211 #undef c4append_ 212 #undef c4appendval_ 213 } 214 215 } // namespace c4 216 217 #ifdef __clang__ 218 # pragma clang diagnostic pop 219 #elif defined(__GNUC__) 220 # pragma GCC diagnostic pop 221 #endif