string_util.h (11173B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #pragma once 5 #include "types.h" 6 #include <charconv> 7 #include <cstddef> 8 #include <cstring> 9 #include <iomanip> 10 #include <optional> 11 #include <span> 12 #include <string> 13 #include <string_view> 14 #include <vector> 15 16 #include "fast_float/fast_float.h" 17 18 // Older versions of libstdc++ are missing support for from_chars() with floats, and was only recently 19 // merged in libc++. So, just fall back to stringstream (yuck!) on everywhere except MSVC. 20 #if !defined(_MSC_VER) 21 #include <locale> 22 #include <sstream> 23 #ifdef __APPLE__ 24 #include <Availability.h> 25 #endif 26 #endif 27 28 namespace StringUtil { 29 30 /// Checks if a wildcard matches a search string. 31 bool WildcardMatch(const char* subject, const char* mask, bool case_sensitive = true); 32 33 /// Safe version of strlcpy. 34 std::size_t Strlcpy(char* dst, const char* src, std::size_t size); 35 36 /// Strlcpy from string_view. 37 std::size_t Strlcpy(char* dst, const std::string_view src, std::size_t size); 38 39 /// Bounds checked version of strlen. 40 std::size_t Strnlen(const char* str, std::size_t max_size); 41 42 /// Platform-independent strcasecmp 43 static inline int Strcasecmp(const char* s1, const char* s2) 44 { 45 #ifdef _MSC_VER 46 return _stricmp(s1, s2); 47 #else 48 return strcasecmp(s1, s2); 49 #endif 50 } 51 52 /// Platform-independent strcasecmp 53 static inline int Strncasecmp(const char* s1, const char* s2, std::size_t n) 54 { 55 #ifdef _MSC_VER 56 return _strnicmp(s1, s2, n); 57 #else 58 return strncasecmp(s1, s2, n); 59 #endif 60 } 61 62 // Case-insensitive equality of string views. 63 static inline bool EqualNoCase(std::string_view s1, std::string_view s2) 64 { 65 if (s1.length() != s2.length()) 66 return false; 67 68 return (Strncasecmp(s1.data(), s2.data(), s1.length()) == 0); 69 } 70 71 /// Wrapper around std::from_chars 72 template<typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> 73 inline std::optional<T> FromChars(const std::string_view str, int base = 10) 74 { 75 T value; 76 77 const std::from_chars_result result = std::from_chars(str.data(), str.data() + str.length(), value, base); 78 if (result.ec != std::errc()) 79 return std::nullopt; 80 81 return value; 82 } 83 template<typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> 84 inline std::optional<T> FromChars(const std::string_view str, int base, std::string_view* endptr) 85 { 86 T value; 87 88 const char* ptr = str.data(); 89 const char* end = ptr + str.length(); 90 const std::from_chars_result result = std::from_chars(ptr, end, value, base); 91 if (result.ec != std::errc()) 92 return std::nullopt; 93 94 if (endptr) 95 { 96 const size_t remaining_len = end - ptr - 1; 97 *endptr = (remaining_len > 0) ? std::string_view(result.ptr, remaining_len) : std::string_view(); 98 } 99 100 return value; 101 } 102 103 template<typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> 104 inline std::optional<T> FromChars(const std::string_view str) 105 { 106 T value; 107 108 const fast_float::from_chars_result result = fast_float::from_chars(str.data(), str.data() + str.length(), value); 109 if (result.ec != std::errc()) 110 return std::nullopt; 111 112 return value; 113 } 114 template<typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> 115 inline std::optional<T> FromChars(const std::string_view str, std::string_view* endptr) 116 { 117 T value; 118 119 const char* ptr = str.data(); 120 const char* end = ptr + str.length(); 121 const fast_float::from_chars_result result = fast_float::from_chars(ptr, end, value); 122 if (result.ec != std::errc()) 123 return std::nullopt; 124 125 if (endptr) 126 { 127 const size_t remaining_len = end - ptr - 1; 128 *endptr = (remaining_len > 0) ? std::string_view(result.ptr, remaining_len) : std::string_view(); 129 } 130 131 return value; 132 } 133 134 /// Wrapper around std::to_chars 135 template<typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> 136 inline std::string ToChars(T value, int base = 10) 137 { 138 // to_chars() requires macOS 10.15+. 139 #if !defined(__APPLE__) || MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15 140 constexpr size_t MAX_SIZE = 32; 141 char buf[MAX_SIZE]; 142 std::string ret; 143 144 const std::to_chars_result result = std::to_chars(buf, buf + MAX_SIZE, value, base); 145 if (result.ec == std::errc()) 146 ret.append(buf, result.ptr - buf); 147 148 return ret; 149 #else 150 std::ostringstream ss; 151 ss.imbue(std::locale::classic()); 152 ss << std::setbase(base) << value; 153 return ss.str(); 154 #endif 155 } 156 157 template<typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> 158 inline std::string ToChars(T value) 159 { 160 // No to_chars() in older versions of libstdc++/libc++. 161 #ifdef _MSC_VER 162 constexpr size_t MAX_SIZE = 64; 163 char buf[MAX_SIZE]; 164 std::string ret; 165 const std::to_chars_result result = std::to_chars(buf, buf + MAX_SIZE, value); 166 if (result.ec == std::errc()) 167 ret.append(buf, result.ptr - buf); 168 return ret; 169 #else 170 std::ostringstream ss; 171 ss.imbue(std::locale::classic()); 172 ss << value; 173 return ss.str(); 174 #endif 175 } 176 177 /// Explicit override for booleans 178 template<> 179 inline std::optional<bool> FromChars(const std::string_view str, int base) 180 { 181 if (Strncasecmp("true", str.data(), str.length()) == 0 || Strncasecmp("yes", str.data(), str.length()) == 0 || 182 Strncasecmp("on", str.data(), str.length()) == 0 || Strncasecmp("1", str.data(), str.length()) == 0 || 183 Strncasecmp("enabled", str.data(), str.length()) == 0) 184 { 185 return true; 186 } 187 188 if (Strncasecmp("false", str.data(), str.length()) == 0 || Strncasecmp("no", str.data(), str.length()) == 0 || 189 Strncasecmp("off", str.data(), str.length()) == 0 || Strncasecmp("0", str.data(), str.length()) == 0 || 190 Strncasecmp("disabled", str.data(), str.length()) == 0) 191 { 192 return false; 193 } 194 195 return std::nullopt; 196 } 197 198 template<> 199 inline std::string ToChars(bool value, int base) 200 { 201 return std::string(value ? "true" : "false"); 202 } 203 204 /// Encode/decode hexadecimal byte buffers 205 std::optional<std::vector<u8>> DecodeHex(const std::string_view str); 206 std::string EncodeHex(const u8* data, int length); 207 208 /// StartsWith/EndsWith variants which aren't case sensitive. 209 ALWAYS_INLINE static bool StartsWithNoCase(const std::string_view str, const std::string_view prefix) 210 { 211 return (!str.empty() && Strncasecmp(str.data(), prefix.data(), prefix.length()) == 0); 212 } 213 ALWAYS_INLINE static bool EndsWithNoCase(const std::string_view str, const std::string_view suffix) 214 { 215 const std::size_t suffix_length = suffix.length(); 216 return (str.length() >= suffix_length && 217 Strncasecmp(str.data() + (str.length() - suffix_length), suffix.data(), suffix_length) == 0); 218 } 219 220 /// Strip whitespace from the start/end of the string. 221 std::string_view StripWhitespace(const std::string_view str); 222 void StripWhitespace(std::string* str); 223 224 /// Splits a string based on a single character delimiter. 225 [[nodiscard]] std::vector<std::string_view> SplitString(const std::string_view str, char delimiter, 226 bool skip_empty = true); 227 [[nodiscard]] std::vector<std::string> SplitNewString(const std::string_view str, char delimiter, 228 bool skip_empty = true); 229 230 /// Joins a string together using the specified delimiter. 231 template<typename T> 232 static inline std::string JoinString(const T& start, const T& end, char delimiter) 233 { 234 std::string ret; 235 for (auto it = start; it != end; ++it) 236 { 237 if (it != start) 238 ret += delimiter; 239 ret.append(*it); 240 } 241 return ret; 242 } 243 template<typename T> 244 static inline std::string JoinString(const T& start, const T& end, const std::string_view delimiter) 245 { 246 std::string ret; 247 for (auto it = start; it != end; ++it) 248 { 249 if (it != start) 250 ret.append(delimiter); 251 ret.append(*it); 252 } 253 return ret; 254 } 255 256 /// Replaces all instances of search in subject with replacement. 257 [[nodiscard]] std::string ReplaceAll(const std::string_view subject, const std::string_view search, 258 const std::string_view replacement); 259 void ReplaceAll(std::string* subject, const std::string_view search, const std::string_view replacement); 260 [[nodiscard]] std::string ReplaceAll(const std::string_view subject, const char search, const char replacement); 261 void ReplaceAll(std::string* subject, const char search, const char replacement); 262 263 /// Parses an assignment string (Key = Value) into its two components. 264 bool ParseAssignmentString(const std::string_view str, std::string_view* key, std::string_view* value); 265 266 /// Unicode replacement character. 267 static constexpr char32_t UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; 268 269 /// Appends a UTF-16/UTF-32 codepoint to a UTF-8 string. 270 void EncodeAndAppendUTF8(std::string& s, char32_t ch); 271 272 /// Decodes UTF-8 to a single codepoint, updating the position parameter. 273 /// Returns the number of bytes the codepoint took in the original string. 274 size_t DecodeUTF8(const void* bytes, size_t length, char32_t* ch); 275 size_t DecodeUTF8(const std::string_view str, size_t offset, char32_t* ch); 276 size_t DecodeUTF8(const std::string& str, size_t offset, char32_t* ch); 277 278 // Replaces the end of a string with ellipsis if it exceeds the specified length. 279 std::string Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis = "..."); 280 void EllipsiseInPlace(std::string& str, u32 max_length, const char* ellipsis = "..."); 281 282 /// Searches for the specified byte pattern in the given memory span. Wildcards (i.e. ??) are supported. 283 std::optional<size_t> BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern); 284 285 /// Strided memcpy/memcmp. 286 ALWAYS_INLINE static void StrideMemCpy(void* dst, std::size_t dst_stride, const void* src, std::size_t src_stride, 287 std::size_t copy_size, std::size_t count) 288 { 289 if (src_stride == dst_stride && src_stride == copy_size) 290 { 291 std::memcpy(dst, src, src_stride * count); 292 return; 293 } 294 295 const u8* src_ptr = static_cast<const u8*>(src); 296 u8* dst_ptr = static_cast<u8*>(dst); 297 for (std::size_t i = 0; i < count; i++) 298 { 299 std::memcpy(dst_ptr, src_ptr, copy_size); 300 src_ptr += src_stride; 301 dst_ptr += dst_stride; 302 } 303 } 304 305 ALWAYS_INLINE static int StrideMemCmp(const void* p1, std::size_t p1_stride, const void* p2, std::size_t p2_stride, 306 std::size_t copy_size, std::size_t count) 307 { 308 if (p1_stride == p2_stride && p1_stride == copy_size) 309 return std::memcmp(p1, p2, p1_stride * count); 310 311 const u8* p1_ptr = static_cast<const u8*>(p1); 312 const u8* p2_ptr = static_cast<const u8*>(p2); 313 for (std::size_t i = 0; i < count; i++) 314 { 315 int result = std::memcmp(p1_ptr, p2_ptr, copy_size); 316 if (result != 0) 317 return result; 318 p2_ptr += p2_stride; 319 p1_ptr += p1_stride; 320 } 321 322 return 0; 323 } 324 325 #ifdef _WIN32 326 327 /// Converts the specified UTF-8 string to a wide string. 328 std::wstring UTF8StringToWideString(const std::string_view str); 329 bool UTF8StringToWideString(std::wstring& dest, const std::string_view str); 330 331 /// Converts the specified wide string to a UTF-8 string. 332 std::string WideStringToUTF8String(const std::wstring_view str); 333 bool WideStringToUTF8String(std::string& dest, const std::wstring_view str); 334 335 #endif 336 337 } // namespace StringUtil