utf.cpp (2069B)
1 #include "c4/utf.hpp" 2 #include "c4/charconv.hpp" 3 4 namespace c4 { 5 6 C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") 7 8 size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t code) 9 { 10 C4_UNUSED(buflen); 11 C4_ASSERT(buflen >= 4); 12 if (code <= UINT32_C(0x7f)) 13 { 14 buf[0] = (uint8_t)code; 15 return 1u; 16 } 17 else if(code <= UINT32_C(0x7ff)) 18 { 19 buf[0] = (uint8_t)(UINT32_C(0xc0) | (code >> 6)); /* 110xxxxx */ 20 buf[1] = (uint8_t)(UINT32_C(0x80) | (code & UINT32_C(0x3f))); /* 10xxxxxx */ 21 return 2u; 22 } 23 else if(code <= UINT32_C(0xffff)) 24 { 25 buf[0] = (uint8_t)(UINT32_C(0xe0) | ((code >> 12))); /* 1110xxxx */ 26 buf[1] = (uint8_t)(UINT32_C(0x80) | ((code >> 6) & UINT32_C(0x3f))); /* 10xxxxxx */ 27 buf[2] = (uint8_t)(UINT32_C(0x80) | ((code ) & UINT32_C(0x3f))); /* 10xxxxxx */ 28 return 3u; 29 } 30 else if(code <= UINT32_C(0x10ffff)) 31 { 32 buf[0] = (uint8_t)(UINT32_C(0xf0) | ((code >> 18))); /* 11110xxx */ 33 buf[1] = (uint8_t)(UINT32_C(0x80) | ((code >> 12) & UINT32_C(0x3f))); /* 10xxxxxx */ 34 buf[2] = (uint8_t)(UINT32_C(0x80) | ((code >> 6) & UINT32_C(0x3f))); /* 10xxxxxx */ 35 buf[3] = (uint8_t)(UINT32_C(0x80) | ((code ) & UINT32_C(0x3f))); /* 10xxxxxx */ 36 return 4u; 37 } 38 return 0; 39 } 40 41 substr decode_code_point(substr out, csubstr code_point) 42 { 43 C4_ASSERT(out.len >= 4); 44 C4_ASSERT(!code_point.begins_with("U+")); 45 C4_ASSERT(!code_point.begins_with("\\x")); 46 C4_ASSERT(!code_point.begins_with("\\u")); 47 C4_ASSERT(!code_point.begins_with("\\U")); 48 C4_ASSERT(!code_point.begins_with('0')); 49 C4_ASSERT(code_point.len <= 8); 50 C4_ASSERT(code_point.len > 0); 51 uint32_t code_point_val; 52 C4_CHECK(read_hex(code_point, &code_point_val)); 53 size_t ret = decode_code_point((uint8_t*)out.str, out.len, code_point_val); 54 C4_ASSERT(ret <= 4); 55 return out.first(ret); 56 } 57 58 C4_SUPPRESS_WARNING_GCC_CLANG_POP 59 60 } // namespace c4