duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

string_util.cpp (14895B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "string_util.h"
      5 #include "assert.h"
      6 
      7 #include <cctype>
      8 #include <codecvt>
      9 #include <cstdio>
     10 #include <sstream>
     11 
     12 #ifndef __APPLE__
     13 #include <malloc.h> // alloca
     14 #else
     15 #include <alloca.h>
     16 #endif
     17 
     18 #ifdef _WIN32
     19 #include "windows_headers.h"
     20 #endif
     21 
     22 bool StringUtil::WildcardMatch(const char* subject, const char* mask, bool case_sensitive /*= true*/)
     23 {
     24   if (case_sensitive)
     25   {
     26     const char* cp = nullptr;
     27     const char* mp = nullptr;
     28 
     29     while ((*subject) && (*mask != '*'))
     30     {
     31       if ((*mask != '?') && (std::tolower(*mask) != std::tolower(*subject)))
     32         return false;
     33 
     34       mask++;
     35       subject++;
     36     }
     37 
     38     while (*subject)
     39     {
     40       if (*mask == '*')
     41       {
     42         if (*++mask == 0)
     43           return true;
     44 
     45         mp = mask;
     46         cp = subject + 1;
     47       }
     48       else
     49       {
     50         if ((*mask == '?') || (std::tolower(*mask) == std::tolower(*subject)))
     51         {
     52           mask++;
     53           subject++;
     54         }
     55         else
     56         {
     57           mask = mp;
     58           subject = cp++;
     59         }
     60       }
     61     }
     62 
     63     while (*mask == '*')
     64     {
     65       mask++;
     66     }
     67 
     68     return *mask == 0;
     69   }
     70   else
     71   {
     72     const char* cp = nullptr;
     73     const char* mp = nullptr;
     74 
     75     while ((*subject) && (*mask != '*'))
     76     {
     77       if ((*mask != *subject) && (*mask != '?'))
     78         return false;
     79 
     80       mask++;
     81       subject++;
     82     }
     83 
     84     while (*subject)
     85     {
     86       if (*mask == '*')
     87       {
     88         if (*++mask == 0)
     89           return true;
     90 
     91         mp = mask;
     92         cp = subject + 1;
     93       }
     94       else
     95       {
     96         if ((*mask == *subject) || (*mask == '?'))
     97         {
     98           mask++;
     99           subject++;
    100         }
    101         else
    102         {
    103           mask = mp;
    104           subject = cp++;
    105         }
    106       }
    107     }
    108 
    109     while (*mask == '*')
    110     {
    111       mask++;
    112     }
    113 
    114     return *mask == 0;
    115   }
    116 }
    117 
    118 std::size_t StringUtil::Strlcpy(char* dst, const char* src, std::size_t size)
    119 {
    120   std::size_t len = std::strlen(src);
    121   if (len < size)
    122   {
    123     std::memcpy(dst, src, len + 1);
    124   }
    125   else
    126   {
    127     std::memcpy(dst, src, size - 1);
    128     dst[size - 1] = '\0';
    129   }
    130   return len;
    131 }
    132 
    133 std::size_t StringUtil::Strnlen(const char* str, std::size_t max_size)
    134 {
    135   const char* loc = static_cast<const char*>(std::memchr(str, 0, max_size));
    136   return loc ? static_cast<size_t>(loc - str) : max_size;
    137 }
    138 
    139 std::size_t StringUtil::Strlcpy(char* dst, const std::string_view src, std::size_t size)
    140 {
    141   std::size_t len = src.length();
    142   if (len < size)
    143   {
    144     std::memcpy(dst, src.data(), len);
    145     dst[len] = '\0';
    146   }
    147   else
    148   {
    149     std::memcpy(dst, src.data(), size - 1);
    150     dst[size - 1] = '\0';
    151   }
    152   return len;
    153 }
    154 
    155 std::optional<std::vector<u8>> StringUtil::DecodeHex(const std::string_view in)
    156 {
    157   std::vector<u8> data;
    158   data.reserve(in.size() / 2);
    159 
    160   for (size_t i = 0; i < in.size() / 2; i++)
    161   {
    162     std::optional<u8> byte = StringUtil::FromChars<u8>(in.substr(i * 2, 2), 16);
    163     if (byte.has_value())
    164       data.push_back(*byte);
    165     else
    166       return std::nullopt;
    167   }
    168 
    169   return {data};
    170 }
    171 
    172 std::string StringUtil::EncodeHex(const u8* data, int length)
    173 {
    174   std::stringstream ss;
    175   for (int i = 0; i < length; i++)
    176     ss << std::hex << std::setfill('0') << std::setw(2) << static_cast<int>(data[i]);
    177 
    178   return ss.str();
    179 }
    180 
    181 std::string_view StringUtil::StripWhitespace(const std::string_view str)
    182 {
    183   std::string_view::size_type start = 0;
    184   while (start < str.size() && std::isspace(str[start]))
    185     start++;
    186   if (start == str.size())
    187     return {};
    188 
    189   std::string_view::size_type end = str.size() - 1;
    190   while (end > start && std::isspace(str[end]))
    191     end--;
    192 
    193   return str.substr(start, end - start + 1);
    194 }
    195 
    196 void StringUtil::StripWhitespace(std::string* str)
    197 {
    198   {
    199     const char* cstr = str->c_str();
    200     std::string_view::size_type start = 0;
    201     while (start < str->size() && std::isspace(cstr[start]))
    202       start++;
    203     if (start != 0)
    204       str->erase(0, start);
    205   }
    206 
    207   {
    208     const char* cstr = str->c_str();
    209     std::string_view::size_type start = str->size();
    210     while (start > 0 && std::isspace(cstr[start - 1]))
    211       start--;
    212     if (start != str->size())
    213       str->erase(start);
    214   }
    215 }
    216 
    217 std::vector<std::string_view> StringUtil::SplitString(const std::string_view str, char delimiter,
    218                                                       bool skip_empty /*= true*/)
    219 {
    220   std::vector<std::string_view> res;
    221   std::string_view::size_type last_pos = 0;
    222   std::string_view::size_type pos;
    223   while (last_pos < str.size() && (pos = str.find(delimiter, last_pos)) != std::string_view::npos)
    224   {
    225     std::string_view part(StripWhitespace(str.substr(last_pos, pos - last_pos)));
    226     if (!skip_empty || !part.empty())
    227       res.push_back(std::move(part));
    228 
    229     last_pos = pos + 1;
    230   }
    231 
    232   if (last_pos < str.size())
    233   {
    234     std::string_view part(StripWhitespace(str.substr(last_pos)));
    235     if (!skip_empty || !part.empty())
    236       res.push_back(std::move(part));
    237   }
    238 
    239   return res;
    240 }
    241 
    242 std::vector<std::string> StringUtil::SplitNewString(const std::string_view str, char delimiter,
    243                                                     bool skip_empty /*= true*/)
    244 {
    245   std::vector<std::string> res;
    246   std::string_view::size_type last_pos = 0;
    247   std::string_view::size_type pos;
    248   while (last_pos < str.size() && (pos = str.find(delimiter, last_pos)) != std::string_view::npos)
    249   {
    250     std::string_view part(StripWhitespace(str.substr(last_pos, pos - last_pos)));
    251     if (!skip_empty || !part.empty())
    252       res.emplace_back(part);
    253 
    254     last_pos = pos + 1;
    255   }
    256 
    257   if (last_pos < str.size())
    258   {
    259     std::string_view part(StripWhitespace(str.substr(last_pos)));
    260     if (!skip_empty || !part.empty())
    261       res.emplace_back(part);
    262   }
    263 
    264   return res;
    265 }
    266 
    267 std::string StringUtil::ReplaceAll(const std::string_view subject, const std::string_view search,
    268                                    const std::string_view replacement)
    269 {
    270   std::string ret(subject);
    271   ReplaceAll(&ret, search, replacement);
    272   return ret;
    273 }
    274 
    275 void StringUtil::ReplaceAll(std::string* subject, const std::string_view search, const std::string_view replacement)
    276 {
    277   if (!subject->empty())
    278   {
    279     std::string::size_type start_pos = 0;
    280     while ((start_pos = subject->find(search, start_pos)) != std::string::npos)
    281     {
    282       subject->replace(start_pos, search.length(), replacement);
    283       start_pos += replacement.length();
    284     }
    285   }
    286 }
    287 
    288 std::string StringUtil::ReplaceAll(const std::string_view subject, const char search, const char replacement)
    289 {
    290   std::string ret(subject);
    291   ReplaceAll(&ret, search, replacement);
    292   return ret;
    293 }
    294 
    295 void StringUtil::ReplaceAll(std::string* subject, const char search, const char replacement)
    296 {
    297   for (size_t i = 0; i < subject->length(); i++)
    298   {
    299     const char ch = (*subject)[i];
    300     (*subject)[i] = (ch == search) ? replacement : ch;
    301   }
    302 }
    303 
    304 bool StringUtil::ParseAssignmentString(const std::string_view str, std::string_view* key, std::string_view* value)
    305 {
    306   const std::string_view::size_type pos = str.find('=');
    307   if (pos == std::string_view::npos)
    308   {
    309     *key = std::string_view();
    310     *value = std::string_view();
    311     return false;
    312   }
    313 
    314   *key = StripWhitespace(str.substr(0, pos));
    315   if (pos != (str.size() - 1))
    316     *value = StripWhitespace(str.substr(pos + 1));
    317   else
    318     *value = std::string_view();
    319 
    320   return true;
    321 }
    322 
    323 void StringUtil::EncodeAndAppendUTF8(std::string& s, char32_t ch)
    324 {
    325   if (ch <= 0x7F)
    326   {
    327     s.push_back(static_cast<char>(static_cast<u8>(ch)));
    328   }
    329   else if (ch <= 0x07FF)
    330   {
    331     s.push_back(static_cast<char>(static_cast<u8>(0xc0 | static_cast<u8>((ch >> 6) & 0x1f))));
    332     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f)))));
    333   }
    334   else if (ch <= 0xFFFF)
    335   {
    336     s.push_back(static_cast<char>(static_cast<u8>(0xe0 | static_cast<u8>(((ch >> 12) & 0x0f)))));
    337     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 6) & 0x3f)))));
    338     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f)))));
    339   }
    340   else if (ch <= 0x10FFFF)
    341   {
    342     s.push_back(static_cast<char>(static_cast<u8>(0xf0 | static_cast<u8>(((ch >> 18) & 0x07)))));
    343     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 12) & 0x3f)))));
    344     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>(((ch >> 6) & 0x3f)))));
    345     s.push_back(static_cast<char>(static_cast<u8>(0x80 | static_cast<u8>((ch & 0x3f)))));
    346   }
    347   else
    348   {
    349     s.push_back(static_cast<char>(0xefu));
    350     s.push_back(static_cast<char>(0xbfu));
    351     s.push_back(static_cast<char>(0xbdu));
    352   }
    353 }
    354 
    355 size_t StringUtil::DecodeUTF8(const void* bytes, size_t length, char32_t* ch)
    356 {
    357   const u8* s = reinterpret_cast<const u8*>(bytes);
    358   if (s[0] < 0x80)
    359   {
    360     *ch = s[0];
    361     return 1;
    362   }
    363   else if ((s[0] & 0xe0) == 0xc0)
    364   {
    365     if (length < 2)
    366       goto invalid;
    367 
    368     *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x1f) << 6) | (static_cast<u32>(s[1] & 0x3f) << 0));
    369     return 2;
    370   }
    371   else if ((s[0] & 0xf0) == 0xe0)
    372   {
    373     if (length < 3)
    374       goto invalid;
    375 
    376     *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x0f) << 12) | (static_cast<u32>(s[1] & 0x3f) << 6) |
    377                                 (static_cast<u32>(s[2] & 0x3f) << 0));
    378     return 3;
    379   }
    380   else if ((s[0] & 0xf8) == 0xf0 && (s[0] <= 0xf4))
    381   {
    382     if (length < 4)
    383       goto invalid;
    384 
    385     *ch = static_cast<char32_t>((static_cast<u32>(s[0] & 0x07) << 18) | (static_cast<u32>(s[1] & 0x3f) << 12) |
    386                                 (static_cast<u32>(s[2] & 0x3f) << 6) | (static_cast<u32>(s[3] & 0x3f) << 0));
    387     return 4;
    388   }
    389 
    390 invalid:
    391   *ch = UNICODE_REPLACEMENT_CHARACTER; // unicode replacement character
    392   return 1;
    393 }
    394 
    395 std::string StringUtil::Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis /*= "..."*/)
    396 {
    397   std::string ret;
    398   ret.reserve(max_length);
    399 
    400   const u32 str_length = static_cast<u32>(str.length());
    401   const u32 ellipsis_len = static_cast<u32>(std::strlen(ellipsis));
    402   DebugAssert(ellipsis_len > 0 && ellipsis_len <= max_length);
    403 
    404   if (str_length > max_length)
    405   {
    406     const u32 copy_size = std::min(str_length, max_length - ellipsis_len);
    407     if (copy_size > 0)
    408       ret.append(str.data(), copy_size);
    409     if (copy_size != str_length)
    410       ret.append(ellipsis);
    411   }
    412   else
    413   {
    414     ret.append(str);
    415   }
    416 
    417   return ret;
    418 }
    419 
    420 void StringUtil::EllipsiseInPlace(std::string& str, u32 max_length, const char* ellipsis /*= "..."*/)
    421 {
    422   const u32 str_length = static_cast<u32>(str.length());
    423   const u32 ellipsis_len = static_cast<u32>(std::strlen(ellipsis));
    424   DebugAssert(ellipsis_len > 0 && ellipsis_len <= max_length);
    425 
    426   if (str_length > max_length)
    427   {
    428     const u32 keep_size = std::min(static_cast<u32>(str.length()), max_length - ellipsis_len);
    429     if (keep_size != str_length)
    430       str.erase(keep_size);
    431 
    432     str.append(ellipsis);
    433   }
    434 }
    435 
    436 std::optional<size_t> StringUtil::BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern)
    437 {
    438   // Parse the pattern into a bytemask.
    439   size_t pattern_length = 0;
    440   bool hinibble = true;
    441   for (size_t i = 0; i < pattern.size(); i++)
    442   {
    443     if ((pattern[i] >= '0' && pattern[i] <= '9') || (pattern[i] >= 'a' && pattern[i] <= 'f') ||
    444         (pattern[i] >= 'A' && pattern[i] <= 'F') || pattern[i] == '?')
    445     {
    446       hinibble ^= true;
    447       if (hinibble)
    448         pattern_length++;
    449     }
    450     else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n')
    451     {
    452       continue;
    453     }
    454     else
    455     {
    456       break;
    457     }
    458   }
    459   if (pattern_length == 0)
    460     return std::nullopt;
    461 
    462   const bool allocate_on_heap = (pattern_length >= 512);
    463   u8* match_bytes = allocate_on_heap ? static_cast<u8*>(alloca(pattern_length * 2)) : new u8[pattern_length * 2];
    464   u8* match_masks = match_bytes + pattern_length;
    465 
    466   hinibble = true;
    467   u8 match_byte = 0;
    468   u8 match_mask = 0;
    469   for (size_t i = 0, match_len = 0; i < pattern.size(); i++)
    470   {
    471     u8 nibble = 0, nibble_mask = 0xF;
    472     if (pattern[i] >= '0' && pattern[i] <= '9')
    473       nibble = pattern[i] - '0';
    474     else if (pattern[i] >= 'a' && pattern[i] <= 'f')
    475       nibble = pattern[i] - 'a' + 0xa;
    476     else if (pattern[i] >= 'A' && pattern[i] <= 'F')
    477       nibble = pattern[i] - 'A' + 0xa;
    478     else if (pattern[i] == '?')
    479       nibble_mask = 0;
    480     else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n')
    481       continue;
    482     else
    483       break;
    484 
    485     hinibble ^= true;
    486     if (hinibble)
    487     {
    488       match_bytes[match_len] = nibble | (match_byte << 4);
    489       match_masks[match_len] = nibble_mask | (match_mask << 4);
    490       match_len++;
    491     }
    492     else
    493     {
    494       match_byte = nibble;
    495       match_mask = nibble_mask;
    496     }
    497   }
    498   if (pattern_length == 0)
    499     return std::nullopt;
    500 
    501   std::optional<size_t> ret;
    502   const size_t max_search_offset = bytes.size() - pattern_length;
    503   for (size_t offset = 0; offset < max_search_offset; offset++)
    504   {
    505     const u8* start = bytes.data() + offset;
    506     for (size_t match_offset = 0;;)
    507     {
    508       if ((start[match_offset] & match_masks[match_offset]) != match_bytes[match_offset])
    509         break;
    510 
    511       match_offset++;
    512       if (match_offset == pattern_length)
    513       {
    514         // found it!
    515         ret = offset;
    516       }
    517     }
    518   }
    519 
    520   if (allocate_on_heap)
    521     delete[] match_bytes;
    522 
    523   return ret;
    524 }
    525 
    526 size_t StringUtil::DecodeUTF8(const std::string_view str, size_t offset, char32_t* ch)
    527 {
    528   return DecodeUTF8(str.data() + offset, str.length() - offset, ch);
    529 }
    530 
    531 size_t StringUtil::DecodeUTF8(const std::string& str, size_t offset, char32_t* ch)
    532 {
    533   return DecodeUTF8(str.data() + offset, str.length() - offset, ch);
    534 }
    535 
    536 #ifdef _WIN32
    537 
    538 std::wstring StringUtil::UTF8StringToWideString(const std::string_view str)
    539 {
    540   std::wstring ret;
    541   if (!UTF8StringToWideString(ret, str))
    542     return {};
    543 
    544   return ret;
    545 }
    546 
    547 bool StringUtil::UTF8StringToWideString(std::wstring& dest, const std::string_view str)
    548 {
    549   int wlen = MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), nullptr, 0);
    550   if (wlen < 0)
    551     return false;
    552 
    553   dest.resize(wlen);
    554   if (wlen > 0 && MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), dest.data(), wlen) < 0)
    555     return false;
    556 
    557   return true;
    558 }
    559 
    560 std::string StringUtil::WideStringToUTF8String(const std::wstring_view str)
    561 {
    562   std::string ret;
    563   if (!WideStringToUTF8String(ret, str))
    564     return {};
    565 
    566   return ret;
    567 }
    568 
    569 bool StringUtil::WideStringToUTF8String(std::string& dest, const std::wstring_view str)
    570 {
    571   int mblen = WideCharToMultiByte(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), nullptr, 0, nullptr, nullptr);
    572   if (mblen < 0)
    573     return false;
    574 
    575   dest.resize(mblen);
    576   if (mblen > 0 && WideCharToMultiByte(CP_UTF8, 0, str.data(), static_cast<int>(str.length()), dest.data(), mblen,
    577                                        nullptr, nullptr) < 0)
    578   {
    579     return false;
    580   }
    581 
    582   return true;
    583 }
    584 
    585 #endif