ascii_number.h (7763B)
1 #ifndef FASTFLOAT_ASCII_NUMBER_H 2 #define FASTFLOAT_ASCII_NUMBER_H 3 4 #include <cctype> 5 #include <cstdint> 6 #include <cstring> 7 #include <iterator> 8 9 #include "float_common.h" 10 11 namespace fast_float { 12 13 // Next function can be micro-optimized, but compilers are entirely 14 // able to optimize it well. 15 fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } 16 17 fastfloat_really_inline uint64_t byteswap(uint64_t val) { 18 return (val & 0xFF00000000000000) >> 56 19 | (val & 0x00FF000000000000) >> 40 20 | (val & 0x0000FF0000000000) >> 24 21 | (val & 0x000000FF00000000) >> 8 22 | (val & 0x00000000FF000000) << 8 23 | (val & 0x0000000000FF0000) << 24 24 | (val & 0x000000000000FF00) << 40 25 | (val & 0x00000000000000FF) << 56; 26 } 27 28 fastfloat_really_inline uint64_t read_u64(const char *chars) { 29 uint64_t val; 30 ::memcpy(&val, chars, sizeof(uint64_t)); 31 #if FASTFLOAT_IS_BIG_ENDIAN == 1 32 // Need to read as-if the number was in little-endian order. 33 val = byteswap(val); 34 #endif 35 return val; 36 } 37 38 fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { 39 #if FASTFLOAT_IS_BIG_ENDIAN == 1 40 // Need to read as-if the number was in little-endian order. 41 val = byteswap(val); 42 #endif 43 ::memcpy(chars, &val, sizeof(uint64_t)); 44 } 45 46 // credit @aqrit 47 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { 48 const uint64_t mask = 0x000000FF000000FF; 49 const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) 50 const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) 51 val -= 0x3030303030303030; 52 val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; 53 val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; 54 return uint32_t(val); 55 } 56 57 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { 58 return parse_eight_digits_unrolled(read_u64(chars)); 59 } 60 61 // credit @aqrit 62 fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { 63 return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 64 0x8080808080808080)); 65 } 66 67 fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { 68 return is_made_of_eight_digits_fast(read_u64(chars)); 69 } 70 71 typedef span<const char> byte_span; 72 73 struct parsed_number_string { 74 int64_t exponent{0}; 75 uint64_t mantissa{0}; 76 const char *lastmatch{nullptr}; 77 bool negative{false}; 78 bool valid{false}; 79 bool too_many_digits{false}; 80 // contains the range of the significant digits 81 byte_span integer{}; // non-nullable 82 byte_span fraction{}; // nullable 83 }; 84 85 // Assuming that you use no more than 19 digits, this will 86 // parse an ASCII string. 87 fastfloat_really_inline 88 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { 89 const chars_format fmt = options.format; 90 const char decimal_point = options.decimal_point; 91 92 parsed_number_string answer; 93 answer.valid = false; 94 answer.too_many_digits = false; 95 answer.negative = (*p == '-'); 96 if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here 97 ++p; 98 if (p == pend) { 99 return answer; 100 } 101 if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot 102 return answer; 103 } 104 } 105 const char *const start_digits = p; 106 107 uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) 108 109 while ((p != pend) && is_integer(*p)) { 110 // a multiplication by 10 is cheaper than an arbitrary integer 111 // multiplication 112 i = 10 * i + 113 uint64_t(*p - '0'); // might overflow, we will handle the overflow later 114 ++p; 115 } 116 const char *const end_of_integer_part = p; 117 int64_t digit_count = int64_t(end_of_integer_part - start_digits); 118 answer.integer = byte_span(start_digits, size_t(digit_count)); 119 int64_t exponent = 0; 120 if ((p != pend) && (*p == decimal_point)) { 121 ++p; 122 const char* before = p; 123 // can occur at most twice without overflowing, but let it occur more, since 124 // for integers with many digits, digit parsing is the primary bottleneck. 125 while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { 126 i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok 127 p += 8; 128 } 129 while ((p != pend) && is_integer(*p)) { 130 uint8_t digit = uint8_t(*p - '0'); 131 ++p; 132 i = i * 10 + digit; // in rare cases, this will overflow, but that's ok 133 } 134 exponent = before - p; 135 answer.fraction = byte_span(before, size_t(p - before)); 136 digit_count -= exponent; 137 } 138 // we must have encountered at least one integer! 139 if (digit_count == 0) { 140 return answer; 141 } 142 int64_t exp_number = 0; // explicit exponential part 143 if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { 144 const char * location_of_e = p; 145 ++p; 146 bool neg_exp = false; 147 if ((p != pend) && ('-' == *p)) { 148 neg_exp = true; 149 ++p; 150 } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) 151 ++p; 152 } 153 if ((p == pend) || !is_integer(*p)) { 154 if(!(fmt & chars_format::fixed)) { 155 // We are in error. 156 return answer; 157 } 158 // Otherwise, we will be ignoring the 'e'. 159 p = location_of_e; 160 } else { 161 while ((p != pend) && is_integer(*p)) { 162 uint8_t digit = uint8_t(*p - '0'); 163 if (exp_number < 0x10000000) { 164 exp_number = 10 * exp_number + digit; 165 } 166 ++p; 167 } 168 if(neg_exp) { exp_number = - exp_number; } 169 exponent += exp_number; 170 } 171 } else { 172 // If it scientific and not fixed, we have to bail out. 173 if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } 174 } 175 answer.lastmatch = p; 176 answer.valid = true; 177 178 // If we frequently had to deal with long strings of digits, 179 // we could extend our code by using a 128-bit integer instead 180 // of a 64-bit integer. However, this is uncommon. 181 // 182 // We can deal with up to 19 digits. 183 if (digit_count > 19) { // this is uncommon 184 // It is possible that the integer had an overflow. 185 // We have to handle the case where we have 0.0000somenumber. 186 // We need to be mindful of the case where we only have zeroes... 187 // E.g., 0.000000000...000. 188 const char *start = start_digits; 189 while ((start != pend) && (*start == '0' || *start == decimal_point)) { 190 if(*start == '0') { digit_count --; } 191 start++; 192 } 193 if (digit_count > 19) { 194 answer.too_many_digits = true; 195 // Let us start again, this time, avoiding overflows. 196 // We don't need to check if is_integer, since we use the 197 // pre-tokenized spans from above. 198 i = 0; 199 p = answer.integer.ptr; 200 const char* int_end = p + answer.integer.len(); 201 const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; 202 while((i < minimal_nineteen_digit_integer) && (p != int_end)) { 203 i = i * 10 + uint64_t(*p - '0'); 204 ++p; 205 } 206 if (i >= minimal_nineteen_digit_integer) { // We have a big integers 207 exponent = end_of_integer_part - p + exp_number; 208 } else { // We have a value with a fractional component. 209 p = answer.fraction.ptr; 210 const char* frac_end = p + answer.fraction.len(); 211 while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { 212 i = i * 10 + uint64_t(*p - '0'); 213 ++p; 214 } 215 exponent = answer.fraction.ptr - p + exp_number; 216 } 217 // We have now corrected both exponent and i, to a truncated value 218 } 219 } 220 answer.exponent = exponent; 221 answer.mantissa = i; 222 return answer; 223 } 224 225 } // namespace fast_float 226 227 #endif