exp.cpp (3232B)
1 #include <sstream> 2 3 #include "exp.h" 4 #include "stream.h" 5 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep 6 7 namespace YAML { 8 struct Mark; 9 } // namespace YAML 10 11 namespace YAML { 12 namespace Exp { 13 unsigned ParseHex(const std::string& str, const Mark& mark) { 14 unsigned value = 0; 15 for (char ch : str) { 16 int digit = 0; 17 if ('a' <= ch && ch <= 'f') 18 digit = ch - 'a' + 10; 19 else if ('A' <= ch && ch <= 'F') 20 digit = ch - 'A' + 10; 21 else if ('0' <= ch && ch <= '9') 22 digit = ch - '0'; 23 else 24 throw ParserException(mark, ErrorMsg::INVALID_HEX); 25 26 value = (value << 4) + digit; 27 } 28 29 return value; 30 } 31 32 std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); } 33 34 // Escape 35 // . Translates the next 'codeLength' characters into a hex number and returns 36 // the result. 37 // . Throws if it's not actually hex. 38 std::string Escape(Stream& in, int codeLength) { 39 // grab string 40 std::string str; 41 for (int i = 0; i < codeLength; i++) 42 str += in.get(); 43 44 // get the value 45 unsigned value = ParseHex(str, in.mark()); 46 47 // legal unicode? 48 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { 49 std::stringstream msg; 50 msg << ErrorMsg::INVALID_UNICODE << value; 51 throw ParserException(in.mark(), msg.str()); 52 } 53 54 // now break it up into chars 55 if (value <= 0x7F) 56 return Str(value); 57 58 if (value <= 0x7FF) 59 return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F)); 60 61 if (value <= 0xFFFF) 62 return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + 63 Str(0x80 + (value & 0x3F)); 64 65 return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) + 66 Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F)); 67 } 68 69 // Escape 70 // . Escapes the sequence starting 'in' (it must begin with a '\' or single 71 // quote) 72 // and returns the result. 73 // . Throws if it's an unknown escape character. 74 std::string Escape(Stream& in) { 75 // eat slash 76 char escape = in.get(); 77 78 // switch on escape character 79 char ch = in.get(); 80 81 // first do single quote, since it's easier 82 if (escape == '\'' && ch == '\'') 83 return "\'"; 84 85 // now do the slash (we're not gonna check if it's a slash - you better pass 86 // one!) 87 switch (ch) { 88 case '0': 89 return std::string(1, '\x00'); 90 case 'a': 91 return "\x07"; 92 case 'b': 93 return "\x08"; 94 case 't': 95 case '\t': 96 return "\x09"; 97 case 'n': 98 return "\x0A"; 99 case 'v': 100 return "\x0B"; 101 case 'f': 102 return "\x0C"; 103 case 'r': 104 return "\x0D"; 105 case 'e': 106 return "\x1B"; 107 case ' ': 108 return R"( )"; 109 case '\"': 110 return "\""; 111 case '\'': 112 return "\'"; 113 case '\\': 114 return "\\"; 115 case '/': 116 return "/"; 117 case 'N': 118 return "\x85"; 119 case '_': 120 return "\xA0"; 121 case 'L': 122 return "\xE2\x80\xA8"; // LS (#x2028) 123 case 'P': 124 return "\xE2\x80\xA9"; // PS (#x2029) 125 case 'x': 126 return Escape(in, 2); 127 case 'u': 128 return Escape(in, 4); 129 case 'U': 130 return Escape(in, 8); 131 } 132 133 std::stringstream msg; 134 throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch); 135 } 136 } // namespace Exp 137 } // namespace YAML