ascii_number.h - duckstation - duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one

ascii_number.h (7763B)
      1 #ifndef FASTFLOAT_ASCII_NUMBER_H
      2 #define FASTFLOAT_ASCII_NUMBER_H
      3 
      4 #include <cctype>
      5 #include <cstdint>
      6 #include <cstring>
      7 #include <iterator>
      8 
      9 #include "float_common.h"
     10 
     11 namespace fast_float {
     12 
     13 // Next function can be micro-optimized, but compilers are entirely
     14 // able to optimize it well.
     15 fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
     16 
     17 fastfloat_really_inline uint64_t byteswap(uint64_t val) {
     18   return (val & 0xFF00000000000000) >> 56
     19     | (val & 0x00FF000000000000) >> 40
     20     | (val & 0x0000FF0000000000) >> 24
     21     | (val & 0x000000FF00000000) >> 8
     22     | (val & 0x00000000FF000000) << 8
     23     | (val & 0x0000000000FF0000) << 24
     24     | (val & 0x000000000000FF00) << 40
     25     | (val & 0x00000000000000FF) << 56;
     26 }
     27 
     28 fastfloat_really_inline uint64_t read_u64(const char *chars) {
     29   uint64_t val;
     30   ::memcpy(&val, chars, sizeof(uint64_t));
     31 #if FASTFLOAT_IS_BIG_ENDIAN == 1
     32   // Need to read as-if the number was in little-endian order.
     33   val = byteswap(val);
     34 #endif
     35   return val;
     36 }
     37 
     38 fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) {
     39 #if FASTFLOAT_IS_BIG_ENDIAN == 1
     40   // Need to read as-if the number was in little-endian order.
     41   val = byteswap(val);
     42 #endif
     43   ::memcpy(chars, &val, sizeof(uint64_t));
     44 }
     45 
     46 // credit  @aqrit
     47 fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
     48   const uint64_t mask = 0x000000FF000000FF;
     49   const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
     50   const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
     51   val -= 0x3030303030303030;
     52   val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
     53   val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
     54   return uint32_t(val);
     55 }
     56 
     57 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
     58   return parse_eight_digits_unrolled(read_u64(chars));
     59 }
     60 
     61 // credit @aqrit
     62 fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
     63   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
     64      0x8080808080808080));
     65 }
     66 
     67 fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
     68   return is_made_of_eight_digits_fast(read_u64(chars));
     69 }
     70 
     71 typedef span<const char> byte_span;
     72 
     73 struct parsed_number_string {
     74   int64_t exponent{0};
     75   uint64_t mantissa{0};
     76   const char *lastmatch{nullptr};
     77   bool negative{false};
     78   bool valid{false};
     79   bool too_many_digits{false};
     80   // contains the range of the significant digits
     81   byte_span integer{};  // non-nullable
     82   byte_span fraction{}; // nullable
     83 };
     84 
     85 // Assuming that you use no more than 19 digits, this will
     86 // parse an ASCII string.
     87 fastfloat_really_inline
     88 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
     89   const chars_format fmt = options.format;
     90   const char decimal_point = options.decimal_point;
     91 
     92   parsed_number_string answer;
     93   answer.valid = false;
     94   answer.too_many_digits = false;
     95   answer.negative = (*p == '-');
     96   if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
     97     ++p;
     98     if (p == pend) {
     99       return answer;
    100     }
    101     if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
    102       return answer;
    103     }
    104   }
    105   const char *const start_digits = p;
    106 
    107   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
    108 
    109   while ((p != pend) && is_integer(*p)) {
    110     // a multiplication by 10 is cheaper than an arbitrary integer
    111     // multiplication
    112     i = 10 * i +
    113         uint64_t(*p - '0'); // might overflow, we will handle the overflow later
    114     ++p;
    115   }
    116   const char *const end_of_integer_part = p;
    117   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
    118   answer.integer = byte_span(start_digits, size_t(digit_count));
    119   int64_t exponent = 0;
    120   if ((p != pend) && (*p == decimal_point)) {
    121     ++p;
    122     const char* before = p;
    123     // can occur at most twice without overflowing, but let it occur more, since
    124     // for integers with many digits, digit parsing is the primary bottleneck.
    125     while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
    126       i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
    127       p += 8;
    128     }
    129     while ((p != pend) && is_integer(*p)) {
    130       uint8_t digit = uint8_t(*p - '0');
    131       ++p;
    132       i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
    133     }
    134     exponent = before - p;
    135     answer.fraction = byte_span(before, size_t(p - before));
    136     digit_count -= exponent;
    137   }
    138   // we must have encountered at least one integer!
    139   if (digit_count == 0) {
    140     return answer;
    141   }
    142   int64_t exp_number = 0;            // explicit exponential part
    143   if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
    144     const char * location_of_e = p;
    145     ++p;
    146     bool neg_exp = false;
    147     if ((p != pend) && ('-' == *p)) {
    148       neg_exp = true;
    149       ++p;
    150     } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
    151       ++p;
    152     }
    153     if ((p == pend) || !is_integer(*p)) {
    154       if(!(fmt & chars_format::fixed)) {
    155         // We are in error.
    156         return answer;
    157       }
    158       // Otherwise, we will be ignoring the 'e'.
    159       p = location_of_e;
    160     } else {
    161       while ((p != pend) && is_integer(*p)) {
    162         uint8_t digit = uint8_t(*p - '0');
    163         if (exp_number < 0x10000000) {
    164           exp_number = 10 * exp_number + digit;
    165         }
    166         ++p;
    167       }
    168       if(neg_exp) { exp_number = - exp_number; }
    169       exponent += exp_number;
    170     }
    171   } else {
    172     // If it scientific and not fixed, we have to bail out.
    173     if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
    174   }
    175   answer.lastmatch = p;
    176   answer.valid = true;
    177 
    178   // If we frequently had to deal with long strings of digits,
    179   // we could extend our code by using a 128-bit integer instead
    180   // of a 64-bit integer. However, this is uncommon.
    181   //
    182   // We can deal with up to 19 digits.
    183   if (digit_count > 19) { // this is uncommon
    184     // It is possible that the integer had an overflow.
    185     // We have to handle the case where we have 0.0000somenumber.
    186     // We need to be mindful of the case where we only have zeroes...
    187     // E.g., 0.000000000...000.
    188     const char *start = start_digits;
    189     while ((start != pend) && (*start == '0' || *start == decimal_point)) {
    190       if(*start == '0') { digit_count --; }
    191       start++;
    192     }
    193     if (digit_count > 19) {
    194       answer.too_many_digits = true;
    195       // Let us start again, this time, avoiding overflows.
    196       // We don't need to check if is_integer, since we use the
    197       // pre-tokenized spans from above.
    198       i = 0;
    199       p = answer.integer.ptr;
    200       const char* int_end = p + answer.integer.len();
    201       const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
    202       while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
    203         i = i * 10 + uint64_t(*p - '0');
    204         ++p;
    205       }
    206       if (i >= minimal_nineteen_digit_integer) { // We have a big integers
    207         exponent = end_of_integer_part - p + exp_number;
    208       } else { // We have a value with a fractional component.
    209           p = answer.fraction.ptr;
    210           const char* frac_end = p + answer.fraction.len();
    211           while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
    212             i = i * 10 + uint64_t(*p - '0');
    213             ++p;
    214           }
    215           exponent = answer.fraction.ptr - p + exp_number;
    216       }
    217       // We have now corrected both exponent and i, to a truncated value
    218     }
    219   }
    220   answer.exponent = exponent;
    221   answer.mantissa = i;
    222   return answer;
    223 }
    224 
    225 } // namespace fast_float
    226 
    227 #endif
	duckstation duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
	git clone https://git.neptards.moe/u3shit/duckstation.git
	Log \| Files \| Refs \| README \| LICENSE