libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

cache_aligned.cc (5379B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/cache_aligned.h"
      7 
      8 #include <jxl/types.h>
      9 #include <stdio.h>
     10 #include <stdlib.h>
     11 
     12 // Disabled: slower than malloc + alignment.
     13 #define JXL_USE_MMAP 0
     14 
     15 #if JXL_USE_MMAP
     16 #include <sys/mman.h>
     17 #endif
     18 
     19 #include <algorithm>  // std::max
     20 #include <atomic>
     21 #include <hwy/base.h>  // kMaxVectorSize
     22 #include <limits>
     23 
     24 #include "lib/jxl/base/printf_macros.h"
     25 #include "lib/jxl/base/status.h"
     26 
     27 namespace jxl {
     28 namespace {
     29 
     30 #pragma pack(push, 1)
     31 struct AllocationHeader {
     32   void* allocated;
     33   size_t allocated_size;
     34   uint8_t left_padding[hwy::kMaxVectorSize];
     35 };
     36 #pragma pack(pop)
     37 
     38 std::atomic<uint64_t> num_allocations{0};
     39 std::atomic<uint64_t> bytes_in_use{0};
     40 std::atomic<uint64_t> max_bytes_in_use{0};
     41 
     42 }  // namespace
     43 
     44 // Avoids linker errors in pre-C++17 builds.
     45 constexpr size_t CacheAligned::kPointerSize;
     46 constexpr size_t CacheAligned::kCacheLineSize;
     47 constexpr size_t CacheAligned::kAlignment;
     48 constexpr size_t CacheAligned::kAlias;
     49 
     50 void CacheAligned::PrintStats() {
     51   fprintf(
     52       stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n",
     53       static_cast<size_t>(num_allocations.load(std::memory_order_relaxed)),
     54       static_cast<double>(max_bytes_in_use.load(std::memory_order_relaxed)));
     55 }
     56 
     57 size_t CacheAligned::NextOffset() {
     58   static std::atomic<uint32_t> next{0};
     59   constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment;
     60   const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
     61   return CacheAligned::kAlignment * group;
     62 }
     63 
     64 void* CacheAligned::Allocate(const size_t payload_size, size_t offset) {
     65   JXL_ASSERT(payload_size <= std::numeric_limits<size_t>::max() / 2);
     66   JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias);
     67 
     68   // What: | misalign | unused | AllocationHeader |payload
     69   // Size: |<= kAlias | offset |                  |payload_size
     70   //       ^allocated.^aligned.^header............^payload
     71   // The header must immediately precede payload, which must remain aligned.
     72   // To avoid wasting space, the header resides at the end of `unused`,
     73   // which therefore cannot be empty (offset == 0).
     74   if (offset == 0) {
     75     // SVE/RVV vectors can be large, so we cannot rely on them (including the
     76     // padding at the end of AllocationHeader) to fit in kAlignment.
     77     offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment);
     78   }
     79 
     80 #if JXL_USE_MMAP
     81   const size_t allocated_size = offset + payload_size;
     82   const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
     83   void* allocated =
     84       mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0);
     85   if (allocated == MAP_FAILED) return nullptr;
     86   const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated);
     87 #else
     88   const size_t allocated_size = kAlias + offset + payload_size;
     89   void* allocated = malloc(allocated_size);
     90   if (allocated == nullptr) return nullptr;
     91   // Always round up even if already aligned - we already asked for kAlias
     92   // extra bytes and there's no way to give them back.
     93   uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
     94   static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
     95   static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
     96   aligned &= ~(kAlias - 1);
     97 #endif
     98 
     99 #if JXL_FALSE
    100   // No effect.
    101   uintptr_t page_aligned = reinterpret_cast<uintptr_t>(allocated);
    102   page_aligned &= ~(4096 - 1);
    103   if (madvise(reinterpret_cast<void*>(page_aligned), allocated_size,
    104               MADV_WILLNEED) != 0) {
    105     JXL_NOTIFY_ERROR("madvise failed");
    106   }
    107 #elif 0
    108   // INCREASES both first and subsequent decode times.
    109   if (mlock(allocated, allocated_size) != 0) {
    110     JXL_NOTIFY_ERROR("mlock failed");
    111   }
    112 #endif
    113 
    114   // Update statistics (#allocations and max bytes in use)
    115   num_allocations.fetch_add(1, std::memory_order_relaxed);
    116   const uint64_t prev_bytes =
    117       bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel);
    118   uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire);
    119   for (;;) {
    120     const uint64_t desired =
    121         std::max(expected_max, prev_bytes + allocated_size);
    122     if (max_bytes_in_use.compare_exchange_strong(expected_max, desired,
    123                                                  std::memory_order_acq_rel)) {
    124       break;
    125     }
    126   }
    127 
    128   const uintptr_t payload = aligned + offset;  // still aligned
    129 
    130   // Stash `allocated` and payload_size inside header for use by Free().
    131   AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
    132   header->allocated = allocated;
    133   header->allocated_size = allocated_size;
    134 
    135   return JXL_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64);
    136 }
    137 
    138 void CacheAligned::Free(const void* aligned_pointer) {
    139   if (aligned_pointer == nullptr) {
    140     return;
    141   }
    142   const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
    143   JXL_ASSERT(payload % kAlignment == 0);
    144   const AllocationHeader* header =
    145       reinterpret_cast<const AllocationHeader*>(payload) - 1;
    146 
    147   // Subtract (2's complement negation).
    148   bytes_in_use.fetch_add(~header->allocated_size + 1,
    149                          std::memory_order_acq_rel);
    150 
    151 #if JXL_USE_MMAP
    152   munmap(header->allocated, header->allocated_size);
    153 #else
    154   free(header->allocated);
    155 #endif
    156 }
    157 
    158 }  // namespace jxl