binary_to_compressed_c.cpp (13376B)
1 // dear imgui 2 // (binary_to_compressed_c.cpp) 3 // Helper tool to turn a file into a C array, if you want to embed font data in your source code. 4 5 // The data is first compressed with stb_compress() to reduce source code size, 6 // then encoded in Base85 to fit in a string so we can fit roughly 4 bytes of compressed data into 5 bytes of source code (suggested by @mmalex) 7 // (If we used 32-bit constants it would require take 11 bytes of source code to encode 4 bytes, and be endianness dependent) 8 // Note that even with compression, the output array is likely to be bigger than the binary file.. 9 // Load compressed TTF fonts with ImGui::GetIO().Fonts->AddFontFromMemoryCompressedTTF() 10 11 // Build with, e.g: 12 // # cl.exe binary_to_compressed_c.cpp 13 // # g++ binary_to_compressed_c.cpp 14 // # clang++ binary_to_compressed_c.cpp 15 // You can also find a precompiled Windows binary in the binary/demo package available from https://github.com/ocornut/imgui 16 17 // Usage: 18 // binary_to_compressed_c.exe [-base85] [-nocompress] <inputfile> <symbolname> 19 // Usage example: 20 // # binary_to_compressed_c.exe myfont.ttf MyFont > myfont.cpp 21 // # binary_to_compressed_c.exe -base85 myfont.ttf MyFont > myfont.cpp 22 23 #define _CRT_SECURE_NO_WARNINGS 24 #include <stdio.h> 25 #include <string.h> 26 #include <stdlib.h> 27 #include <assert.h> 28 29 // stb_compress* from stb.h - declaration 30 typedef unsigned int stb_uint; 31 typedef unsigned char stb_uchar; 32 stb_uint stb_compress(stb_uchar* out, stb_uchar* in, stb_uint len); 33 34 static bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression); 35 36 int main(int argc, char** argv) 37 { 38 if (argc < 3) 39 { 40 printf("Syntax: %s [-base85] [-nocompress] <inputfile> <symbolname>\n", argv[0]); 41 return 0; 42 } 43 44 int argn = 1; 45 bool use_base85_encoding = false; 46 bool use_compression = true; 47 if (argv[argn][0] == '-') 48 { 49 if (strcmp(argv[argn], "-base85") == 0) { use_base85_encoding = true; argn++; } 50 else if (strcmp(argv[argn], "-nocompress") == 0) { use_compression = false; argn++; } 51 else 52 { 53 fprintf(stderr, "Unknown argument: '%s'\n", argv[argn]); 54 return 1; 55 } 56 } 57 58 bool ret = binary_to_compressed_c(argv[argn], argv[argn + 1], use_base85_encoding, use_compression); 59 if (!ret) 60 fprintf(stderr, "Error opening or reading file: '%s'\n", argv[argn]); 61 return ret ? 0 : 1; 62 } 63 64 char Encode85Byte(unsigned int x) 65 { 66 x = (x % 85) + 35; 67 return (x >= '\\') ? x + 1 : x; 68 } 69 70 bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression) 71 { 72 // Read file 73 FILE* f = fopen(filename, "rb"); 74 if (!f) return false; 75 int data_sz; 76 if (fseek(f, 0, SEEK_END) || (data_sz = (int)ftell(f)) == -1 || fseek(f, 0, SEEK_SET)) { fclose(f); return false; } 77 char* data = new char[data_sz + 4]; 78 if (fread(data, 1, data_sz, f) != (size_t)data_sz) { fclose(f); delete[] data; return false; } 79 memset((void*)(((char*)data) + data_sz), 0, 4); 80 fclose(f); 81 82 // Compress 83 int maxlen = data_sz + 512 + (data_sz >> 2) + sizeof(int); // total guess 84 char* compressed = use_compression ? new char[maxlen] : data; 85 int compressed_sz = use_compression ? stb_compress((stb_uchar*)compressed, (stb_uchar*)data, data_sz) : data_sz; 86 if (use_compression) 87 memset(compressed + compressed_sz, 0, maxlen - compressed_sz); 88 89 // Output as Base85 encoded 90 FILE* out = stdout; 91 fprintf(out, "// File: '%s' (%d bytes)\n", filename, (int)data_sz); 92 fprintf(out, "// Exported using binary_to_compressed_c.cpp\n"); 93 const char* compressed_str = use_compression ? "compressed_" : ""; 94 if (use_base85_encoding) 95 { 96 fprintf(out, "static const char %s_%sdata_base85[%d+1] =\n \"", symbol, compressed_str, (int)((compressed_sz + 3) / 4)*5); 97 char prev_c = 0; 98 for (int src_i = 0; src_i < compressed_sz; src_i += 4) 99 { 100 // This is made a little more complicated by the fact that ??X sequences are interpreted as trigraphs by old C/C++ compilers. So we need to escape pairs of ??. 101 unsigned int d = *(unsigned int*)(compressed + src_i); 102 for (unsigned int n5 = 0; n5 < 5; n5++, d /= 85) 103 { 104 char c = Encode85Byte(d); 105 fprintf(out, (c == '?' && prev_c == '?') ? "\\%c" : "%c", c); 106 prev_c = c; 107 } 108 if ((src_i % 112) == 112 - 4) 109 fprintf(out, "\"\n \""); 110 } 111 fprintf(out, "\";\n\n"); 112 } 113 else 114 { 115 fprintf(out, "static const unsigned int %s_%ssize = %d;\n", symbol, compressed_str, (int)compressed_sz); 116 fprintf(out, "static const unsigned int %s_%sdata[%d/4] =\n{", symbol, compressed_str, (int)((compressed_sz + 3) / 4)*4); 117 int column = 0; 118 for (int i = 0; i < compressed_sz; i += 4) 119 { 120 unsigned int d = *(unsigned int*)(compressed + i); 121 if ((column++ % 12) == 0) 122 fprintf(out, "\n 0x%08x, ", d); 123 else 124 fprintf(out, "0x%08x, ", d); 125 } 126 fprintf(out, "\n};\n\n"); 127 } 128 129 // Cleanup 130 delete[] data; 131 if (use_compression) 132 delete[] compressed; 133 return true; 134 } 135 136 // stb_compress* from stb.h - definition 137 138 //////////////////// compressor /////////////////////// 139 140 static stb_uint stb_adler32(stb_uint adler32, stb_uchar *buffer, stb_uint buflen) 141 { 142 const unsigned long ADLER_MOD = 65521; 143 unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; 144 unsigned long blocklen, i; 145 146 blocklen = buflen % 5552; 147 while (buflen) { 148 for (i=0; i + 7 < blocklen; i += 8) { 149 s1 += buffer[0], s2 += s1; 150 s1 += buffer[1], s2 += s1; 151 s1 += buffer[2], s2 += s1; 152 s1 += buffer[3], s2 += s1; 153 s1 += buffer[4], s2 += s1; 154 s1 += buffer[5], s2 += s1; 155 s1 += buffer[6], s2 += s1; 156 s1 += buffer[7], s2 += s1; 157 158 buffer += 8; 159 } 160 161 for (; i < blocklen; ++i) 162 s1 += *buffer++, s2 += s1; 163 164 s1 %= ADLER_MOD, s2 %= ADLER_MOD; 165 buflen -= blocklen; 166 blocklen = 5552; 167 } 168 return (s2 << 16) + s1; 169 } 170 171 static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen) 172 { 173 stb_uint i; 174 for (i=0; i < maxlen; ++i) 175 if (m1[i] != m2[i]) return i; 176 return i; 177 } 178 179 // simple implementation that just takes the source data in a big block 180 181 static stb_uchar *stb__out; 182 static FILE *stb__outfile; 183 static stb_uint stb__outbytes; 184 185 static void stb__write(unsigned char v) 186 { 187 fputc(v, stb__outfile); 188 ++stb__outbytes; 189 } 190 191 //#define stb_out(v) (stb__out ? *stb__out++ = (stb_uchar) (v) : stb__write((stb_uchar) (v))) 192 #define stb_out(v) do { if (stb__out) *stb__out++ = (stb_uchar) (v); else stb__write((stb_uchar) (v)); } while (0) 193 194 static void stb_out2(stb_uint v) { stb_out(v >> 8); stb_out(v); } 195 static void stb_out3(stb_uint v) { stb_out(v >> 16); stb_out(v >> 8); stb_out(v); } 196 static void stb_out4(stb_uint v) { stb_out(v >> 24); stb_out(v >> 16); stb_out(v >> 8 ); stb_out(v); } 197 198 static void outliterals(stb_uchar *in, int numlit) 199 { 200 while (numlit > 65536) { 201 outliterals(in,65536); 202 in += 65536; 203 numlit -= 65536; 204 } 205 206 if (numlit == 0) ; 207 else if (numlit <= 32) stb_out (0x000020 + numlit-1); 208 else if (numlit <= 2048) stb_out2(0x000800 + numlit-1); 209 else /* numlit <= 65536) */ stb_out3(0x070000 + numlit-1); 210 211 if (stb__out) { 212 memcpy(stb__out,in,numlit); 213 stb__out += numlit; 214 } else 215 fwrite(in, 1, numlit, stb__outfile); 216 } 217 218 static int stb__window = 0x40000; // 256K 219 220 static int stb_not_crap(int best, int dist) 221 { 222 return ((best > 2 && dist <= 0x00100) 223 || (best > 5 && dist <= 0x04000) 224 || (best > 7 && dist <= 0x80000)); 225 } 226 227 static stb_uint stb__hashsize = 32768; 228 229 // note that you can play with the hashing functions all you 230 // want without needing to change the decompressor 231 #define stb__hc(q,h,c) (((h) << 7) + ((h) >> 25) + q[c]) 232 #define stb__hc2(q,h,c,d) (((h) << 14) + ((h) >> 18) + (q[c] << 7) + q[d]) 233 #define stb__hc3(q,c,d,e) ((q[c] << 14) + (q[d] << 7) + q[e]) 234 235 static unsigned int stb__running_adler; 236 237 static int stb_compress_chunk(stb_uchar *history, 238 stb_uchar *start, 239 stb_uchar *end, 240 int length, 241 int *pending_literals, 242 stb_uchar **chash, 243 stb_uint mask) 244 { 245 (void)history; 246 int window = stb__window; 247 stb_uint match_max; 248 stb_uchar *lit_start = start - *pending_literals; 249 stb_uchar *q = start; 250 251 #define STB__SCRAMBLE(h) (((h) + ((h) >> 16)) & mask) 252 253 // stop short of the end so we don't scan off the end doing 254 // the hashing; this means we won't compress the last few bytes 255 // unless they were part of something longer 256 while (q < start+length && q+12 < end) { 257 int m; 258 stb_uint h1,h2,h3,h4, h; 259 stb_uchar *t; 260 int best = 2, dist=0; 261 262 if (q+65536 > end) 263 match_max = end-q; 264 else 265 match_max = 65536; 266 267 #define stb__nc(b,d) ((d) <= window && ((b) > 9 || stb_not_crap(b,d))) 268 269 #define STB__TRY(t,p) /* avoid retrying a match we already tried */ \ 270 if (p ? dist != q-t : 1) \ 271 if ((m = stb_matchlen(t, q, match_max)) > best) \ 272 if (stb__nc(m,q-(t))) \ 273 best = m, dist = q - (t) 274 275 // rather than search for all matches, only try 4 candidate locations, 276 // chosen based on 4 different hash functions of different lengths. 277 // this strategy is inspired by LZO; hashing is unrolled here using the 278 // 'hc' macro 279 h = stb__hc3(q,0, 1, 2); h1 = STB__SCRAMBLE(h); 280 t = chash[h1]; if (t) STB__TRY(t,0); 281 h = stb__hc2(q,h, 3, 4); h2 = STB__SCRAMBLE(h); 282 h = stb__hc2(q,h, 5, 6); t = chash[h2]; if (t) STB__TRY(t,1); 283 h = stb__hc2(q,h, 7, 8); h3 = STB__SCRAMBLE(h); 284 h = stb__hc2(q,h, 9,10); t = chash[h3]; if (t) STB__TRY(t,1); 285 h = stb__hc2(q,h,11,12); h4 = STB__SCRAMBLE(h); 286 t = chash[h4]; if (t) STB__TRY(t,1); 287 288 // because we use a shared hash table, can only update it 289 // _after_ we've probed all of them 290 chash[h1] = chash[h2] = chash[h3] = chash[h4] = q; 291 292 if (best > 2) 293 assert(dist > 0); 294 295 // see if our best match qualifies 296 if (best < 3) { // fast path literals 297 ++q; 298 } else if (best > 2 && best <= 0x80 && dist <= 0x100) { 299 outliterals(lit_start, q-lit_start); lit_start = (q += best); 300 stb_out(0x80 + best-1); 301 stb_out(dist-1); 302 } else if (best > 5 && best <= 0x100 && dist <= 0x4000) { 303 outliterals(lit_start, q-lit_start); lit_start = (q += best); 304 stb_out2(0x4000 + dist-1); 305 stb_out(best-1); 306 } else if (best > 7 && best <= 0x100 && dist <= 0x80000) { 307 outliterals(lit_start, q-lit_start); lit_start = (q += best); 308 stb_out3(0x180000 + dist-1); 309 stb_out(best-1); 310 } else if (best > 8 && best <= 0x10000 && dist <= 0x80000) { 311 outliterals(lit_start, q-lit_start); lit_start = (q += best); 312 stb_out3(0x100000 + dist-1); 313 stb_out2(best-1); 314 } else if (best > 9 && dist <= 0x1000000) { 315 if (best > 65536) best = 65536; 316 outliterals(lit_start, q-lit_start); lit_start = (q += best); 317 if (best <= 0x100) { 318 stb_out(0x06); 319 stb_out3(dist-1); 320 stb_out(best-1); 321 } else { 322 stb_out(0x04); 323 stb_out3(dist-1); 324 stb_out2(best-1); 325 } 326 } else { // fallback literals if no match was a balanced tradeoff 327 ++q; 328 } 329 } 330 331 // if we didn't get all the way, add the rest to literals 332 if (q-start < length) 333 q = start+length; 334 335 // the literals are everything from lit_start to q 336 *pending_literals = (q - lit_start); 337 338 stb__running_adler = stb_adler32(stb__running_adler, start, q - start); 339 return q - start; 340 } 341 342 static int stb_compress_inner(stb_uchar *input, stb_uint length) 343 { 344 int literals = 0; 345 stb_uint len,i; 346 347 stb_uchar **chash; 348 chash = (stb_uchar**) malloc(stb__hashsize * sizeof(stb_uchar*)); 349 if (chash == NULL) return 0; // failure 350 for (i=0; i < stb__hashsize; ++i) 351 chash[i] = NULL; 352 353 // stream signature 354 stb_out(0x57); stb_out(0xbc); 355 stb_out2(0); 356 357 stb_out4(0); // 64-bit length requires 32-bit leading 0 358 stb_out4(length); 359 stb_out4(stb__window); 360 361 stb__running_adler = 1; 362 363 len = stb_compress_chunk(input, input, input+length, length, &literals, chash, stb__hashsize-1); 364 assert(len == length); 365 366 outliterals(input+length - literals, literals); 367 368 free(chash); 369 370 stb_out2(0x05fa); // end opcode 371 372 stb_out4(stb__running_adler); 373 374 return 1; // success 375 } 376 377 stb_uint stb_compress(stb_uchar *out, stb_uchar *input, stb_uint length) 378 { 379 stb__out = out; 380 stb__outfile = NULL; 381 382 stb_compress_inner(input, length); 383 384 return stb__out - out; 385 }