cache_aligned.cc (5379B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/cache_aligned.h" 7 8 #include <jxl/types.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 12 // Disabled: slower than malloc + alignment. 13 #define JXL_USE_MMAP 0 14 15 #if JXL_USE_MMAP 16 #include <sys/mman.h> 17 #endif 18 19 #include <algorithm> // std::max 20 #include <atomic> 21 #include <hwy/base.h> // kMaxVectorSize 22 #include <limits> 23 24 #include "lib/jxl/base/printf_macros.h" 25 #include "lib/jxl/base/status.h" 26 27 namespace jxl { 28 namespace { 29 30 #pragma pack(push, 1) 31 struct AllocationHeader { 32 void* allocated; 33 size_t allocated_size; 34 uint8_t left_padding[hwy::kMaxVectorSize]; 35 }; 36 #pragma pack(pop) 37 38 std::atomic<uint64_t> num_allocations{0}; 39 std::atomic<uint64_t> bytes_in_use{0}; 40 std::atomic<uint64_t> max_bytes_in_use{0}; 41 42 } // namespace 43 44 // Avoids linker errors in pre-C++17 builds. 45 constexpr size_t CacheAligned::kPointerSize; 46 constexpr size_t CacheAligned::kCacheLineSize; 47 constexpr size_t CacheAligned::kAlignment; 48 constexpr size_t CacheAligned::kAlias; 49 50 void CacheAligned::PrintStats() { 51 fprintf( 52 stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n", 53 static_cast<size_t>(num_allocations.load(std::memory_order_relaxed)), 54 static_cast<double>(max_bytes_in_use.load(std::memory_order_relaxed))); 55 } 56 57 size_t CacheAligned::NextOffset() { 58 static std::atomic<uint32_t> next{0}; 59 constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment; 60 const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups; 61 return CacheAligned::kAlignment * group; 62 } 63 64 void* CacheAligned::Allocate(const size_t payload_size, size_t offset) { 65 JXL_ASSERT(payload_size <= std::numeric_limits<size_t>::max() / 2); 66 JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias); 67 68 // What: | misalign | unused | AllocationHeader |payload 69 // Size: |<= kAlias | offset | |payload_size 70 // ^allocated.^aligned.^header............^payload 71 // The header must immediately precede payload, which must remain aligned. 72 // To avoid wasting space, the header resides at the end of `unused`, 73 // which therefore cannot be empty (offset == 0). 74 if (offset == 0) { 75 // SVE/RVV vectors can be large, so we cannot rely on them (including the 76 // padding at the end of AllocationHeader) to fit in kAlignment. 77 offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment); 78 } 79 80 #if JXL_USE_MMAP 81 const size_t allocated_size = offset + payload_size; 82 const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE; 83 void* allocated = 84 mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0); 85 if (allocated == MAP_FAILED) return nullptr; 86 const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated); 87 #else 88 const size_t allocated_size = kAlias + offset + payload_size; 89 void* allocated = malloc(allocated_size); 90 if (allocated == nullptr) return nullptr; 91 // Always round up even if already aligned - we already asked for kAlias 92 // extra bytes and there's no way to give them back. 93 uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias; 94 static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2"); 95 static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias"); 96 aligned &= ~(kAlias - 1); 97 #endif 98 99 #if JXL_FALSE 100 // No effect. 101 uintptr_t page_aligned = reinterpret_cast<uintptr_t>(allocated); 102 page_aligned &= ~(4096 - 1); 103 if (madvise(reinterpret_cast<void*>(page_aligned), allocated_size, 104 MADV_WILLNEED) != 0) { 105 JXL_NOTIFY_ERROR("madvise failed"); 106 } 107 #elif 0 108 // INCREASES both first and subsequent decode times. 109 if (mlock(allocated, allocated_size) != 0) { 110 JXL_NOTIFY_ERROR("mlock failed"); 111 } 112 #endif 113 114 // Update statistics (#allocations and max bytes in use) 115 num_allocations.fetch_add(1, std::memory_order_relaxed); 116 const uint64_t prev_bytes = 117 bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel); 118 uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire); 119 for (;;) { 120 const uint64_t desired = 121 std::max(expected_max, prev_bytes + allocated_size); 122 if (max_bytes_in_use.compare_exchange_strong(expected_max, desired, 123 std::memory_order_acq_rel)) { 124 break; 125 } 126 } 127 128 const uintptr_t payload = aligned + offset; // still aligned 129 130 // Stash `allocated` and payload_size inside header for use by Free(). 131 AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1; 132 header->allocated = allocated; 133 header->allocated_size = allocated_size; 134 135 return JXL_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64); 136 } 137 138 void CacheAligned::Free(const void* aligned_pointer) { 139 if (aligned_pointer == nullptr) { 140 return; 141 } 142 const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer); 143 JXL_ASSERT(payload % kAlignment == 0); 144 const AllocationHeader* header = 145 reinterpret_cast<const AllocationHeader*>(payload) - 1; 146 147 // Subtract (2's complement negation). 148 bytes_in_use.fetch_add(~header->allocated_size + 1, 149 std::memory_order_acq_rel); 150 151 #if JXL_USE_MMAP 152 munmap(header->allocated, header->allocated_size); 153 #else 154 free(header->allocated); 155 #endif 156 } 157 158 } // namespace jxl