djxl_fuzzer_corpus.cc (16659B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <jxl/types.h> 7 #include <stdint.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <sys/stat.h> 11 12 #include <cstring> 13 #include <string> 14 #include <thread> 15 #include <utility> 16 17 #include "lib/extras/packed_image.h" 18 #include "lib/jxl/base/common.h" 19 #include "lib/jxl/base/status.h" 20 #include "lib/jxl/color_encoding_internal.h" 21 #include "lib/jxl/frame_header.h" 22 #include "lib/jxl/image_bundle.h" 23 #include "lib/jxl/modular/options.h" 24 #if defined(_WIN32) || defined(_WIN64) 25 #include "third_party/dirent.h" 26 #else 27 #endif 28 29 #include <functional> 30 #include <iostream> 31 #include <mutex> 32 #include <random> 33 #include <vector> 34 35 #include "lib/extras/enc/encode.h" 36 #include "lib/extras/enc/jpg.h" 37 #include "lib/jxl/base/data_parallel.h" 38 #include "lib/jxl/base/override.h" 39 #include "lib/jxl/base/span.h" 40 #include "lib/jxl/codec_in_out.h" 41 #include "lib/jxl/enc_ans.h" 42 #include "lib/jxl/enc_external_image.h" 43 #include "lib/jxl/enc_params.h" 44 #include "lib/jxl/encode_internal.h" 45 #include "lib/jxl/jpeg/enc_jpeg_data.h" 46 #include "lib/jxl/test_utils.h" // TODO(eustas): cut this dependency 47 #include "tools/file_io.h" 48 #include "tools/thread_pool_internal.h" 49 50 namespace { 51 52 const size_t kMaxWidth = 50000; 53 const size_t kMaxHeight = 50000; 54 const size_t kMaxPixels = 20 * (1 << 20); // 20 MP 55 const size_t kMaxBitDepth = 24; // The maximum reasonable bit depth supported. 56 57 std::mutex stderr_mutex; 58 59 typedef std::function<uint8_t()> PixelGenerator; 60 61 // ImageSpec needs to be a packed struct to allow us to use the raw memory of 62 // the struct for hashing to create a consistent. 63 #pragma pack(push, 1) 64 struct ImageSpec { 65 bool Validate() const { 66 if (width > kMaxWidth || height > kMaxHeight || 67 width * height > kMaxPixels) { 68 return false; 69 } 70 if (bit_depth > kMaxBitDepth || bit_depth == 0) return false; 71 if (num_frames == 0) return false; 72 // JPEG doesn't support all formats, so reconstructible JPEG isn't always 73 // valid. 74 if (is_reconstructible_jpeg && (bit_depth != 8 || num_channels != 3 || 75 alpha_bit_depth != 0 || num_frames != 1)) 76 return false; 77 return true; 78 } 79 80 friend std::ostream& operator<<(std::ostream& o, const ImageSpec& spec) { 81 o << "ImageSpec<" 82 << "size=" << spec.width << "x" << spec.height 83 << " * chan=" << spec.num_channels << " depth=" << spec.bit_depth 84 << " alpha=" << spec.alpha_bit_depth 85 << " (premult=" << spec.alpha_is_premultiplied 86 << ") x frames=" << spec.num_frames << " seed=" << spec.seed 87 << ", speed=" << static_cast<int>(spec.params.speed_tier) 88 << ", butteraugli=" << spec.params.butteraugli_distance 89 << ", modular_mode=" << spec.params.modular_mode 90 << ", lossy_palette=" << spec.params.lossy_palette 91 << ", noise=" << spec.params.noise << ", preview=" << spec.params.preview 92 << ", fuzzer_friendly=" << spec.fuzzer_friendly 93 << ", is_reconstructible_jpeg=" << spec.is_reconstructible_jpeg 94 << ", orientation=" << static_cast<int>(spec.orientation) << ">"; 95 return o; 96 } 97 98 void SpecHash(uint8_t hash[16]) const { 99 const uint8_t* from = reinterpret_cast<const uint8_t*>(this); 100 std::seed_seq hasher(from, from + sizeof(*this)); 101 uint32_t* to = reinterpret_cast<uint32_t*>(hash); 102 hasher.generate(to, to + 4); 103 } 104 105 uint64_t width = 256; 106 uint64_t height = 256; 107 // Number of channels *not* including alpha. 108 uint64_t num_channels = 3; 109 uint64_t bit_depth = 8; 110 // Bit depth for the alpha channel. A value of 0 means no alpha channel. 111 uint64_t alpha_bit_depth = 8; 112 int32_t alpha_is_premultiplied = JXL_FALSE; 113 114 // Whether the ANS fuzzer friendly setting is currently enabled. 115 uint32_t fuzzer_friendly = JXL_FALSE; 116 117 // Number of frames, all the frames will have the same size. 118 uint64_t num_frames = 1; 119 120 // The seed for the PRNG. 121 uint32_t seed = 7777; 122 123 // Flags used for compression. These are mapped to the CompressedParams. 124 struct CjxlParams { 125 float butteraugli_distance = 1.f; 126 // Must not use Weighted - see force_no_wp 127 jxl::Predictor modular_predictor = jxl::Predictor::Gradient; 128 jxl::ColorTransform color_transform = jxl::ColorTransform::kXYB; 129 jxl::SpeedTier speed_tier = jxl::SpeedTier::kTortoise; 130 bool modular_mode = false; 131 bool lossy_palette = false; 132 bool noise = false; 133 bool preview = false; 134 // CjxlParams is packed; re-add padding when sum of sizes of members is not 135 // multiple of 4. 136 // uint8_t padding_[0] = {}; 137 } params; 138 139 uint32_t is_reconstructible_jpeg = JXL_FALSE; 140 // Use 0xFFFFFFFF if any random spec is good; otherwise set the desired value. 141 uint32_t override_decoder_spec = 0xFFFFFFFF; 142 // Orientation. 143 uint8_t orientation = 0; 144 uint8_t padding_[3] = {}; 145 }; 146 #pragma pack(pop) 147 static_assert(sizeof(ImageSpec) % 4 == 0, "Add padding to ImageSpec."); 148 149 bool GenerateFile(const char* output_dir, const ImageSpec& spec, 150 bool regenerate, bool quiet) { 151 // Compute a checksum of the ImageSpec to name the file. This is just to keep 152 // the output of this program repeatable. 153 uint8_t checksum[16]; 154 spec.SpecHash(checksum); 155 std::string hash_str(sizeof(checksum) * 2, ' '); 156 static const char* hex_chars = "0123456789abcdef"; 157 for (size_t i = 0; i < sizeof(checksum); i++) { 158 hash_str[2 * i] = hex_chars[checksum[i] >> 4]; 159 hash_str[2 * i + 1] = hex_chars[checksum[i] % 0x0f]; 160 } 161 std::string output_fn = std::string(output_dir) + "/" + hash_str + ".jxl"; 162 163 // Don't regenerate files if they already exist on disk to speed-up 164 // consecutive calls when --regenerate is not used. 165 struct stat st; 166 if (!regenerate && stat(output_fn.c_str(), &st) == 0 && S_ISREG(st.st_mode)) { 167 return true; 168 } 169 170 if (!quiet) { 171 std::unique_lock<std::mutex> lock(stderr_mutex); 172 std::cerr << "Generating " << spec << " as " << hash_str << std::endl; 173 } 174 175 jxl::CodecInOut io; 176 if (spec.bit_depth == 32) { 177 io.metadata.m.SetFloat32Samples(); 178 } else { 179 io.metadata.m.SetUintSamples(spec.bit_depth); 180 } 181 io.metadata.m.SetAlphaBits(spec.alpha_bit_depth, 182 FROM_JXL_BOOL(spec.alpha_is_premultiplied)); 183 io.metadata.m.orientation = spec.orientation; 184 io.frames.clear(); 185 io.frames.reserve(spec.num_frames); 186 187 jxl::ColorEncoding c; 188 if (spec.num_channels == 1) { 189 c = jxl::ColorEncoding::LinearSRGB(true); 190 } else if (spec.num_channels == 3) { 191 c = jxl::ColorEncoding::SRGB(); 192 } 193 194 uint8_t hash[16]; 195 spec.SpecHash(hash); 196 std::mt19937 mt(spec.seed); 197 198 // Compress the image. 199 jxl::PaddedBytes compressed; 200 201 std::uniform_int_distribution<> dis(1, 6); 202 PixelGenerator gen = [&]() -> uint8_t { return dis(mt); }; 203 204 jxl::extras::PackedPixelFile ppf; 205 ppf.info.xsize = spec.width; 206 ppf.info.ysize = spec.height; 207 ppf.info.num_color_channels = spec.num_channels ? 1 : 3; 208 ppf.info.bits_per_sample = spec.bit_depth; 209 for (uint32_t frame = 0; frame < spec.num_frames; frame++) { 210 jxl::ImageBundle ib(&io.metadata.m); 211 const bool has_alpha = (spec.alpha_bit_depth != 0); 212 const int alpha_channels = (has_alpha ? 1 : 0); 213 const size_t bytes_per_sample = 214 jxl::DivCeil(io.metadata.m.bit_depth.bits_per_sample, 8); 215 const size_t bytes_per_pixel = 216 bytes_per_sample * 217 (io.metadata.m.color_encoding.Channels() + alpha_channels); 218 const size_t row_size = spec.width * bytes_per_pixel; 219 std::vector<uint8_t> img_data(row_size * spec.height, 0); 220 for (size_t y = 0; y < spec.height; y++) { 221 size_t pos = row_size * y; 222 for (size_t x = 0; x < spec.width; x++) { 223 for (size_t b = 0; b < bytes_per_pixel; b++) { 224 img_data[pos++] = gen(); 225 } 226 } 227 } 228 uint32_t num_channels = bytes_per_pixel / bytes_per_sample; 229 JxlDataType data_type = 230 bytes_per_sample == 1 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16; 231 JxlPixelFormat format = {num_channels, data_type, JXL_LITTLE_ENDIAN, 0}; 232 const jxl::Span<const uint8_t> span(img_data.data(), img_data.size()); 233 JXL_RETURN_IF_ERROR(ConvertFromExternal( 234 span, spec.width, spec.height, io.metadata.m.color_encoding, 235 io.metadata.m.bit_depth.bits_per_sample, format, nullptr, &ib)); 236 io.frames.push_back(std::move(ib)); 237 jxl::extras::PackedFrame packed_frame(spec.width, spec.height, format); 238 JXL_ASSERT(packed_frame.color.pixels_size == img_data.size()); 239 memcpy(packed_frame.color.pixels(0, 0, 0), img_data.data(), 240 img_data.size()); 241 ppf.frames.emplace_back(std::move(packed_frame)); 242 } 243 244 jxl::CompressParams params; 245 params.speed_tier = spec.params.speed_tier; 246 247 if (spec.is_reconstructible_jpeg) { 248 // If this image is supposed to be a reconstructible JPEG, collect the JPEG 249 // metadata and encode it in the beginning of the compressed bytes. 250 std::vector<uint8_t> jpeg_bytes; 251 io.jpeg_quality = 70; 252 auto encoder = jxl::extras::GetJPEGEncoder(); 253 encoder->SetOption("quality", "70"); 254 jxl::extras::EncodedImage encoded; 255 JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded, nullptr)); 256 jpeg_bytes = encoded.bitstreams[0]; 257 JXL_RETURN_IF_ERROR(jxl::jpeg::DecodeImageJPG( 258 jxl::Bytes(jpeg_bytes.data(), jpeg_bytes.size()), &io)); 259 std::vector<uint8_t> jpeg_data; 260 JXL_RETURN_IF_ERROR( 261 EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data, params)); 262 std::vector<uint8_t> header; 263 header.insert(header.end(), jxl::kContainerHeader.begin(), 264 jxl::kContainerHeader.end()); 265 jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false, 266 &header); 267 jxl::Bytes(jpeg_data).AppendTo(header); 268 jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &header); 269 compressed.append(header); 270 } 271 272 params.modular_mode = spec.params.modular_mode; 273 params.color_transform = spec.params.color_transform; 274 params.butteraugli_distance = spec.params.butteraugli_distance; 275 params.options.predictor = {spec.params.modular_predictor}; 276 params.lossy_palette = spec.params.lossy_palette; 277 if (spec.params.preview) params.preview = jxl::Override::kOn; 278 if (spec.params.noise) params.noise = jxl::Override::kOn; 279 280 // EncodeFile replaces output; pass a temporary storage for it. 281 std::vector<uint8_t> compressed_image; 282 bool ok = jxl::test::EncodeFile(params, &io, &compressed_image); 283 if (!ok) return false; 284 compressed.append(compressed_image); 285 286 // Append 4 bytes with the flags used by djxl_fuzzer to select the decoding 287 // output. 288 std::uniform_int_distribution<> dis256(0, 255); 289 if (spec.override_decoder_spec == 0xFFFFFFFF) { 290 for (size_t i = 0; i < 4; ++i) compressed.push_back(dis256(mt)); 291 } else { 292 for (size_t i = 0; i < 4; ++i) { 293 compressed.push_back(spec.override_decoder_spec >> (8 * i)); 294 } 295 } 296 297 if (!jpegxl::tools::WriteFile(output_fn, compressed)) return false; 298 if (!quiet) { 299 std::unique_lock<std::mutex> lock(stderr_mutex); 300 std::cerr << "Stored " << output_fn << " size: " << compressed.size() 301 << std::endl; 302 } 303 304 return true; 305 } 306 307 std::vector<ImageSpec::CjxlParams> CompressParamsList() { 308 std::vector<ImageSpec::CjxlParams> ret; 309 310 { 311 ImageSpec::CjxlParams params; 312 params.butteraugli_distance = 1.5; 313 ret.push_back(params); 314 } 315 316 { 317 // Lossless 318 ImageSpec::CjxlParams params; 319 params.modular_mode = true; 320 params.color_transform = jxl::ColorTransform::kNone; 321 params.butteraugli_distance = 0.f; 322 params.modular_predictor = {jxl::Predictor::Weighted}; 323 ret.push_back(params); 324 } 325 326 return ret; 327 } 328 329 void Usage() { 330 fprintf(stderr, 331 "Use: fuzzer_corpus [-r] [-q] [-j THREADS] [output_dir]\n" 332 "\n" 333 " -r Regenerate files if already exist.\n" 334 " -q Be quiet.\n" 335 " -j THREADS Number of parallel jobs to run.\n"); 336 } 337 338 } // namespace 339 340 int main(int argc, const char** argv) { 341 const char* dest_dir = nullptr; 342 bool regenerate = false; 343 bool quiet = false; 344 size_t num_threads = std::thread::hardware_concurrency(); 345 for (int optind = 1; optind < argc;) { 346 if (!strcmp(argv[optind], "-r")) { 347 regenerate = true; 348 optind++; 349 } else if (!strcmp(argv[optind], "-q")) { 350 quiet = true; 351 optind++; 352 } else if (!strcmp(argv[optind], "-j")) { 353 optind++; 354 if (optind < argc) { 355 num_threads = atoi(argv[optind++]); 356 } else { 357 fprintf(stderr, "-j needs an argument value.\n"); 358 Usage(); 359 return 1; 360 } 361 } else if (dest_dir == nullptr) { 362 dest_dir = argv[optind++]; 363 } else { 364 fprintf(stderr, "Unknown parameter: \"%s\".\n", argv[optind]); 365 Usage(); 366 return 1; 367 } 368 } 369 if (!dest_dir) { 370 dest_dir = "corpus"; 371 } 372 373 struct stat st; 374 memset(&st, 0, sizeof(st)); 375 if (stat(dest_dir, &st) != 0 || !S_ISDIR(st.st_mode)) { 376 fprintf(stderr, "Output path \"%s\" is not a directory.\n", dest_dir); 377 Usage(); 378 return 1; 379 } 380 381 // Create the corpus directory if doesn't already exist. 382 std::mt19937 mt(77777); 383 384 std::vector<std::pair<uint32_t, uint32_t>> image_sizes = { 385 {8, 8}, 386 {32, 32}, 387 {128, 128}, 388 // Degenerated cases. 389 {10000, 1}, 390 {10000, 2}, 391 {1, 10000}, 392 {2, 10000}, 393 // Large case. 394 {555, 256}, 395 {257, 513}, 396 }; 397 const std::vector<ImageSpec::CjxlParams> params_list = CompressParamsList(); 398 399 ImageSpec spec; 400 // The ans_fuzzer_friendly setting is not thread safe and therefore done in 401 // an outer loop. This determines whether to use fuzzer-friendly ANS encoding. 402 for (bool fuzzer_friendly : {false, true}) { 403 jxl::SetANSFuzzerFriendly(fuzzer_friendly); 404 spec.fuzzer_friendly = TO_JXL_BOOL(fuzzer_friendly); 405 406 std::vector<ImageSpec> specs; 407 for (auto img_size : image_sizes) { 408 spec.width = img_size.first; 409 spec.height = img_size.second; 410 for (uint32_t bit_depth : {1, 2, 8, 16}) { 411 spec.bit_depth = bit_depth; 412 for (uint32_t num_channels : {1, 3}) { 413 spec.num_channels = num_channels; 414 for (uint32_t alpha_bit_depth : {0, 8, 16}) { 415 spec.alpha_bit_depth = alpha_bit_depth; 416 if (bit_depth == 16 && alpha_bit_depth == 8) { 417 // This mode is not supported in CopyTo(). 418 continue; 419 } 420 for (uint32_t num_frames : {1, 3}) { 421 spec.num_frames = num_frames; 422 for (bool preview : {false, true}) { 423 for (JXL_BOOL reconstructible_jpeg : {JXL_FALSE, JXL_TRUE}) { 424 spec.is_reconstructible_jpeg = reconstructible_jpeg; 425 for (const auto& params : params_list) { 426 spec.params = params; 427 428 spec.params.preview = preview; 429 if (alpha_bit_depth) { 430 spec.alpha_is_premultiplied = mt() % 2; 431 } 432 if (spec.width * spec.height > 1000) { 433 // Increase the encoder speed for larger images. 434 spec.params.speed_tier = jxl::SpeedTier::kWombat; 435 } 436 spec.seed = mt() % 777777; 437 // Pick the orientation at random. It is orthogonal to all 438 // other features. Valid values are 1 to 8. 439 spec.orientation = 1 + (mt() % 8); 440 if (!spec.Validate()) { 441 if (!quiet) { 442 std::cerr << "Skipping " << spec << std::endl; 443 } 444 } else { 445 specs.push_back(spec); 446 } 447 } 448 } 449 } 450 } 451 } 452 } 453 } 454 } 455 456 specs.emplace_back(); 457 specs.back().params.lossy_palette = true; 458 specs.back().override_decoder_spec = 0; 459 460 specs.emplace_back(); 461 specs.back().params.noise = true; 462 specs.back().override_decoder_spec = 0; 463 464 jpegxl::tools::ThreadPoolInternal pool{num_threads}; 465 const auto generate = [&specs, dest_dir, regenerate, quiet]( 466 const uint32_t task, size_t /* thread */) { 467 const ImageSpec& spec = specs[task]; 468 GenerateFile(dest_dir, spec, regenerate, quiet); 469 }; 470 if (!RunOnPool(&pool, 0, specs.size(), jxl::ThreadPool::NoInit, generate, 471 "FuzzerCorpus")) { 472 std::cerr << "Error generating fuzzer corpus" << std::endl; 473 return 1; 474 } 475 } 476 std::cerr << "Finished generating fuzzer corpus" << std::endl; 477 return 0; 478 }