string_util.cpp (14895B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "string_util.h" 5 #include "assert.h" 6 7 #include <cctype> 8 #include <codecvt> 9 #include <cstdio> 10 #include <sstream> 11 12 #ifndef __APPLE__ 13 #include <malloc.h> // alloca 14 #else 15 #include <alloca.h> 16 #endif 17 18 #ifdef _WIN32 19 #include "windows_headers.h" 20 #endif 21 22 bool StringUtil::WildcardMatch(const char* subject, const char* mask, bool case_sensitive /*= true*/) 23 { 24 if (case_sensitive) 25 { 26 const char* cp = nullptr; 27 const char* mp = nullptr; 28 29 while ((*subject) && (*mask != '*')) 30 { 31 if ((*mask != '?') && (std::tolower(*mask) != std::tolower(*subject))) 32 return false; 33 34 mask++; 35 subject++; 36 } 37 38 while (*subject) 39 { 40 if (*mask == '*') 41 { 42 if (*++mask == 0) 43 return true; 44 45 mp = mask; 46 cp = subject + 1; 47 } 48 else 49 { 50 if ((*mask == '?') || (std::tolower(*mask) == std::tolower(*subject))) 51 { 52 mask++; 53 subject++; 54 } 55 else 56 { 57 mask = mp; 58 subject = cp++; 59 } 60 } 61 } 62 63 while (*mask == '*') 64 { 65 mask++; 66 } 67 68 return *mask == 0; 69 } 70 else 71 { 72 const char* cp = nullptr; 73 const char* mp = nullptr; 74 75 while ((*subject) && (*mask != '*')) 76 { 77 if ((*mask != *subject) && (*mask != '?')) 78 return false; 79 80 mask++; 81 subject++; 82 } 83 84 while (*subject) 85 { 86 if (*mask == '*') 87 { 88 if (*++mask == 0) 89 return true; 90 91 mp = mask; 92 cp = subject + 1; 93 } 94 else 95 { 96 if ((*mask == *subject) || (*mask == '?')) 97 { 98 mask++; 99 subject++; 100 } 101 else 102 { 103 mask = mp; 104 subject = cp++; 105 } 106 } 107 } 108 109 while (*mask == '*') 110 { 111 mask++; 112 } 113 114 return *mask == 0; 115 } 116 } 117 118 std::size_t StringUtil::Strlcpy(char* dst, const char* src, std::size_t size) 119 { 120 std::size_t len = std::strlen(src); 121 if (len < size) 122 { 123 std::memcpy(dst, src, len + 1); 124 } 125 else 126 { 127 std::memcpy(dst, src, size - 1); 128 dst[size - 1] = '\0'; 129 } 130 return len; 131 } 132 133 std::size_t StringUtil::Strnlen(const char* str, std::size_t max_size) 134 { 135 const char* loc = static_cast<const char*>(std::memchr(str, 0, max_size)); 136 return loc ? static_cast<size_t>(loc - str) : max_size; 137 } 138 139 std::size_t StringUtil::Strlcpy(char* dst, const std::string_view src, std::size_t size) 140 { 141 std::size_t len = src.length(); 142 if (len < size) 143 { 144 std::memcpy(dst, src.data(), len); 145 dst[len] = '\0'; 146 } 147 else 148 { 149 std::memcpy(dst, src.data(), size - 1); 150 dst[size - 1] = '\0'; 151 } 152 return len; 153 } 154 155 std::optional<std::vector<u8>> StringUtil::DecodeHex(const std::string_view in) 156 { 157 std::vector<u8> data; 158 data.reserve(in.size() / 2); 159 160 for (size_t i = 0; i < in.size() / 2; i++) 161 { 162 std::optional<u8> byte = StringUtil::FromChars<u8>(in.substr(i * 2, 2), 16); 163 if (byte.has_value()) 164 data.push_back(*byte); 165 else 166 return std::nullopt; 167 } 168 169 return {data}; 170 } 171 172 std::string StringUtil::EncodeHex(const u8* data, int length) 173 { 174 std::stringstream ss; 175 for (int i = 0; i < length; i++) 176 ss << std::hex << std::setfill('0') << std::setw(2) << static_cast<int>(data[i]); 177 178 return ss.str(); 179 } 180 181 std::string_view StringUtil::StripWhitespace(const std::string_view str) 182 { 183 std::string_view::size_type start = 0; 184 while (start < str.size() && std::isspace(str[start])) 185 start++; 186 if (start == str.size()) 187 return {}; 188 189 std::string_view::size_type end = str.size() - 1; 190 while (end > start && std::isspace(str[end])) 191 end--; 192 193 return str.substr(start, end - start + 1); 194 } 195 196 void StringUtil::StripWhitespace(std::string* str) 197 { 198 { 199 const char* cstr = str->c_str(); 200 std::string_view::size_type start = 0; 201 while (start < str->size() && std::isspace(cstr[start])) 202 start++; 203 if (start != 0) 204 str->erase(0, start); 205 } 206 207 { 208 const char* cstr = str->c_str(); 209 std::string_view::size_type start = str->size(); 210 while (start > 0 && std::isspace(cstr[start - 1])) 211 start--; 212 if (start != str->size()) 213 str->erase(start); 214 } 215 } 216 217 std::vector<std::string_view> StringUtil::SplitString(const std::string_view str, char delimiter, 218 bool skip_empty /*= true*/) 219 { 220 std::vector<std::string_view> res; 221 std::string_view::size_type last_pos = 0; 222 std::string_view::size_type pos; 223 while (last_pos < str.size() && (pos = str.find(delimiter, last_pos)) != std::string_view::npos) 224 { 225 std::string_view part(StripWhitespace(str.substr(last_pos, pos - last_pos))); 226 if (!skip_empty || !part.empty()) 227 res.push_back(std::move(part)); 228 229 last_pos = pos + 1; 230 } 231 232 if (last_pos < str.size()) 233 { 234 std::string_view part(StripWhitespace(str.substr(last_pos))); 235 if (!skip_empty || !part.empty()) 236 res.push_back(std::move(part)); 237 } 238 239 return res; 240 } 241 242 std::vector<std::string> StringUtil::SplitNewString(const std::string_view str, char delimiter, 243 bool skip_empty /*= true*/) 244 { 245 std::vector<std::string> res; 246 std::string_view::size_type last_pos = 0; 247 std::string_view::size_type pos; 248 while (last_pos < str.size() && (pos = str.find(delimiter, last_pos)) != std::string_view::npos) 249 { 250 std::string_view part(StripWhitespace(str.substr(last_pos, pos - last_pos))); 251 if (!skip_empty || !part.empty()) 252 res.emplace_back(part); 253 254 last_pos = pos + 1; 255 } 256 257 if (last_pos < str.size()) 258 { 259 std::string_view part(StripWhitespace(str.substr(last_pos))); 260 if (!skip_empty || !part.empty()) 261 res.emplace_back(part); 262 } 263 264 return res; 265 } 266 267 std::string StringUtil::ReplaceAll(const std::string_view subject, const std::string_view search, 268 const std::string_view replacement) 269 { 270 std::string ret(subject); 271 ReplaceAll(&ret, search, replacement); 272 return ret; 273 } 274 275 void StringUtil::ReplaceAll(std::string* subject, const std::string_view search, const std::string_view replacement) 276 { 277 if (!subject->empty()) 278 { 279 std::string::size_type start_pos = 0; 280 while ((start_pos = subject->find(search, start_pos)) != std::string::npos) 281 { 282 subject->replace(start_pos, search.length(), replacement); 283 start_pos += replacement.length(); 284 } 285 } 286 } 287 288 std::string StringUtil::ReplaceAll(const std::string_view subject, const char search, const char replacement) 289 { 290 std::string ret(subject); 291 ReplaceAll(&ret, search, replacement); 292 return ret; 293 } 294 295 void StringUtil::ReplaceAll(std::string* subject, const char search, const char replacement) 296 { 297 for (size_t i = 0; i < subject->length(); i++) 298 { 299 const char ch = (*subject)[i]; 300 (*subject)[i] = (ch == search) ? replacement : ch; 301 } 302 } 303 304 bool StringUtil::ParseAssignmentString(const std::string_view str, std::string_view* key, std::string_view* value) 305 { 306 const std::string_view::size_type pos = str.find('='); 307 if (pos == std::string_view::npos) 308 { 309 *key = std::string_view(); 310 *value = std::string_view(); 311 return false; 312 } 313 314 *key = StripWhitespace(str.substr(0, pos)); 315 if (pos != (str.size() - 1)) 316 *value = StripWhitespace(str.substr(pos + 1)); 317 else 318 *value = std::string_view(); 319 320 return true; 321 } 322 323 void StringUtil::EncodeAndAppendUTF8(std::string& s, char32_t ch) 324 { 325 if (ch <= 0x7F) 326 { 327 s.push_back(static_cast<char>(static_cast<u8>(ch))); 328 } 329 else if (ch <= 0x07FF) 330 { 331 s.push_back(static_cast<char>(static_cast<u8>(0xc0 | static_cast<u8>((ch >> 6) & 0x1f)))); 332 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f))))); 333 } 334 else if (ch <= 0xFFFF) 335 { 336 s.push_back(static_cast<char>(static_cast<u8>(0xe0 | static_cast<u8>(((ch >> 12) & 0x0f))))); 337 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 6) & 0x3f))))); 338 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f))))); 339 } 340 else if (ch <= 0x10FFFF) 341 { 342 s.push_back(static_cast<char>(static_cast<u8>(0xf0 | static_cast<u8>(((ch >> 18) & 0x07))))); 343 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 12) & 0x3f))))); 344 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 6) & 0x3f))))); 345 s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f))))); 346 } 347 else 348 { 349 s.push_back(static_cast<char>(0xefu)); 350 s.push_back(static_cast<char>(0xbfu)); 351 s.push_back(static_cast<char>(0xbdu)); 352 } 353 } 354 355 size_t StringUtil::DecodeUTF8(const void* bytes, size_t length, char32_t* ch) 356 { 357 const u8* s = reinterpret_cast<const u8*>(bytes); 358 if (s[0] < 0x80) 359 { 360 *ch = s[0]; 361 return 1; 362 } 363 else if ((s[0] & 0xe0) == 0xc0) 364 { 365 if (length < 2) 366 goto invalid; 367 368 *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x1f) << 6) | (static_cast<u32>(s[1] & 0x3f) << 0)); 369 return 2; 370 } 371 else if ((s[0] & 0xf0) == 0xe0) 372 { 373 if (length < 3) 374 goto invalid; 375 376 *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x0f) << 12) | (static_cast<u32>(s[1] & 0x3f) << 6) | 377 (static_cast<u32>(s[2] & 0x3f) << 0)); 378 return 3; 379 } 380 else if ((s[0] & 0xf8) == 0xf0 && (s[0] <= 0xf4)) 381 { 382 if (length < 4) 383 goto invalid; 384 385 *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x07) << 18) | (static_cast<u32>(s[1] & 0x3f) << 12) | 386 (static_cast<u32>(s[2] & 0x3f) << 6) | (static_cast<u32>(s[3] & 0x3f) << 0)); 387 return 4; 388 } 389 390 invalid: 391 *ch = UNICODE_REPLACEMENT_CHARACTER; // unicode replacement character 392 return 1; 393 } 394 395 std::string StringUtil::Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis /*= "..."*/) 396 { 397 std::string ret; 398 ret.reserve(max_length); 399 400 const u32 str_length = static_cast<u32>(str.length()); 401 const u32 ellipsis_len = static_cast<u32>(std::strlen(ellipsis)); 402 DebugAssert(ellipsis_len > 0 && ellipsis_len <= max_length); 403 404 if (str_length > max_length) 405 { 406 const u32 copy_size = std::min(str_length, max_length - ellipsis_len); 407 if (copy_size > 0) 408 ret.append(str.data(), copy_size); 409 if (copy_size != str_length) 410 ret.append(ellipsis); 411 } 412 else 413 { 414 ret.append(str); 415 } 416 417 return ret; 418 } 419 420 void StringUtil::EllipsiseInPlace(std::string& str, u32 max_length, const char* ellipsis /*= "..."*/) 421 { 422 const u32 str_length = static_cast<u32>(str.length()); 423 const u32 ellipsis_len = static_cast<u32>(std::strlen(ellipsis)); 424 DebugAssert(ellipsis_len > 0 && ellipsis_len <= max_length); 425 426 if (str_length > max_length) 427 { 428 const u32 keep_size = std::min(static_cast<u32>(str.length()), max_length - ellipsis_len); 429 if (keep_size != str_length) 430 str.erase(keep_size); 431 432 str.append(ellipsis); 433 } 434 } 435 436 std::optional<size_t> StringUtil::BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern) 437 { 438 // Parse the pattern into a bytemask. 439 size_t pattern_length = 0; 440 bool hinibble = true; 441 for (size_t i = 0; i < pattern.size(); i++) 442 { 443 if ((pattern[i] >= '0' && pattern[i] <= '9') || (pattern[i] >= 'a' && pattern[i] <= 'f') || 444 (pattern[i] >= 'A' && pattern[i] <= 'F') || pattern[i] == '?') 445 { 446 hinibble ^= true; 447 if (hinibble) 448 pattern_length++; 449 } 450 else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n') 451 { 452 continue; 453 } 454 else 455 { 456 break; 457 } 458 } 459 if (pattern_length == 0) 460 return std::nullopt; 461 462 const bool allocate_on_heap = (pattern_length >= 512); 463 u8* match_bytes = allocate_on_heap ? static_cast<u8*>(alloca(pattern_length * 2)) : new u8[pattern_length * 2]; 464 u8* match_masks = match_bytes + pattern_length; 465 466 hinibble = true; 467 u8 match_byte = 0; 468 u8 match_mask = 0; 469 for (size_t i = 0, match_len = 0; i < pattern.size(); i++) 470 { 471 u8 nibble = 0, nibble_mask = 0xF; 472 if (pattern[i] >= '0' && pattern[i] <= '9') 473 nibble = pattern[i] - '0'; 474 else if (pattern[i] >= 'a' && pattern[i] <= 'f') 475 nibble = pattern[i] - 'a' + 0xa; 476 else if (pattern[i] >= 'A' && pattern[i] <= 'F') 477 nibble = pattern[i] - 'A' + 0xa; 478 else if (pattern[i] == '?') 479 nibble_mask = 0; 480 else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n') 481 continue; 482 else 483 break; 484 485 hinibble ^= true; 486 if (hinibble) 487 { 488 match_bytes[match_len] = nibble | (match_byte << 4); 489 match_masks[match_len] = nibble_mask | (match_mask << 4); 490 match_len++; 491 } 492 else 493 { 494 match_byte = nibble; 495 match_mask = nibble_mask; 496 } 497 } 498 if (pattern_length == 0) 499 return std::nullopt; 500 501 std::optional<size_t> ret; 502 const size_t max_search_offset = bytes.size() - pattern_length; 503 for (size_t offset = 0; offset < max_search_offset; offset++) 504 { 505 const u8* start = bytes.data() + offset; 506 for (size_t match_offset = 0;;) 507 { 508 if ((start[match_offset] & match_masks[match_offset]) != match_bytes[match_offset]) 509 break; 510 511 match_offset++; 512 if (match_offset == pattern_length) 513 { 514 // found it! 515 ret = offset; 516 } 517 } 518 } 519 520 if (allocate_on_heap) 521 delete[] match_bytes; 522 523 return ret; 524 } 525 526 size_t StringUtil::DecodeUTF8(const std::string_view str, size_t offset, char32_t* ch) 527 { 528 return DecodeUTF8(str.data() + offset, str.length() - offset, ch); 529 } 530 531 size_t StringUtil::DecodeUTF8(const std::string& str, size_t offset, char32_t* ch) 532 { 533 return DecodeUTF8(str.data() + offset, str.length() - offset, ch); 534 } 535 536 #ifdef _WIN32 537 538 std::wstring StringUtil::UTF8StringToWideString(const std::string_view str) 539 { 540 std::wstring ret; 541 if (!UTF8StringToWideString(ret, str)) 542 return {}; 543 544 return ret; 545 } 546 547 bool StringUtil::UTF8StringToWideString(std::wstring& dest, const std::string_view str) 548 { 549 int wlen = MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), nullptr, 0); 550 if (wlen < 0) 551 return false; 552 553 dest.resize(wlen); 554 if (wlen > 0 && MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), dest.data(), wlen) < 0) 555 return false; 556 557 return true; 558 } 559 560 std::string StringUtil::WideStringToUTF8String(const std::wstring_view str) 561 { 562 std::string ret; 563 if (!WideStringToUTF8String(ret, str)) 564 return {}; 565 566 return ret; 567 } 568 569 bool StringUtil::WideStringToUTF8String(std::string& dest, const std::wstring_view str) 570 { 571 int mblen = WideCharToMultiByte(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), nullptr, 0, nullptr, nullptr); 572 if (mblen < 0) 573 return false; 574 575 dest.resize(mblen); 576 if (mblen > 0 && WideCharToMultiByte(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), dest.data(), mblen, 577 nullptr, nullptr) < 0) 578 { 579 return false; 580 } 581 582 return true; 583 } 584 585 #endif