qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

json-parser.c (16328B)


      1 /*
      2  * JSON Parser
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori@us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include "qemu/osdep.h"
     15 #include "qemu/ctype.h"
     16 #include "qemu/cutils.h"
     17 #include "qemu/unicode.h"
     18 #include "qapi/error.h"
     19 #include "qapi/qmp/qbool.h"
     20 #include "qapi/qmp/qdict.h"
     21 #include "qapi/qmp/qlist.h"
     22 #include "qapi/qmp/qnull.h"
     23 #include "qapi/qmp/qnum.h"
     24 #include "qapi/qmp/qstring.h"
     25 #include "json-parser-int.h"
     26 
     27 struct JSONToken {
     28     JSONTokenType type;
     29     int x;
     30     int y;
     31     char str[];
     32 };
     33 
     34 typedef struct JSONParserContext {
     35     Error *err;
     36     JSONToken *current;
     37     GQueue *buf;
     38     va_list *ap;
     39 } JSONParserContext;
     40 
     41 #define BUG_ON(cond) assert(!(cond))
     42 
     43 /**
     44  * TODO
     45  *
     46  * 0) make errors meaningful again
     47  * 1) add geometry information to tokens
     48  * 3) should we return a parsed size?
     49  * 4) deal with premature EOI
     50  */
     51 
     52 static QObject *parse_value(JSONParserContext *ctxt);
     53 
     54 /**
     55  * Error handler
     56  */
     57 static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
     58                                            JSONToken *token, const char *msg, ...)
     59 {
     60     va_list ap;
     61     char message[1024];
     62 
     63     if (ctxt->err) {
     64         return;
     65     }
     66     va_start(ap, msg);
     67     vsnprintf(message, sizeof(message), msg, ap);
     68     va_end(ap);
     69     error_setg(&ctxt->err, "JSON parse error, %s", message);
     70 }
     71 
     72 static int cvt4hex(const char *s)
     73 {
     74     int cp, i;
     75 
     76     cp = 0;
     77     for (i = 0; i < 4; i++) {
     78         if (!qemu_isxdigit(s[i])) {
     79             return -1;
     80         }
     81         cp <<= 4;
     82         if (s[i] >= '0' && s[i] <= '9') {
     83             cp |= s[i] - '0';
     84         } else if (s[i] >= 'a' && s[i] <= 'f') {
     85             cp |= 10 + s[i] - 'a';
     86         } else if (s[i] >= 'A' && s[i] <= 'F') {
     87             cp |= 10 + s[i] - 'A';
     88         } else {
     89             return -1;
     90         }
     91     }
     92     return cp;
     93 }
     94 
     95 /**
     96  * parse_string(): Parse a JSON string
     97  *
     98  * From RFC 8259 "The JavaScript Object Notation (JSON) Data
     99  * Interchange Format":
    100  *
    101  *    char = unescaped /
    102  *        escape (
    103  *            %x22 /          ; "    quotation mark  U+0022
    104  *            %x5C /          ; \    reverse solidus U+005C
    105  *            %x2F /          ; /    solidus         U+002F
    106  *            %x62 /          ; b    backspace       U+0008
    107  *            %x66 /          ; f    form feed       U+000C
    108  *            %x6E /          ; n    line feed       U+000A
    109  *            %x72 /          ; r    carriage return U+000D
    110  *            %x74 /          ; t    tab             U+0009
    111  *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
    112  *    escape = %x5C              ; \
    113  *    quotation-mark = %x22      ; "
    114  *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
    115  *
    116  * Extensions over RFC 8259:
    117  * - Extra escape sequence in strings:
    118  *   0x27 (apostrophe) is recognized after escape, too
    119  * - Single-quoted strings:
    120  *   Like double-quoted strings, except they're delimited by %x27
    121  *   (apostrophe) instead of %x22 (quotation mark), and can't contain
    122  *   unescaped apostrophe, but can contain unescaped quotation mark.
    123  *
    124  * Note:
    125  * - Encoding is modified UTF-8.
    126  * - Invalid Unicode characters are rejected.
    127  * - Control characters \x00..\x1F are rejected by the lexer.
    128  */
    129 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
    130 {
    131     const char *ptr = token->str;
    132     GString *str;
    133     char quote;
    134     const char *beg;
    135     int cp, trailing;
    136     char *end;
    137     ssize_t len;
    138     char utf8_buf[5];
    139 
    140     assert(*ptr == '"' || *ptr == '\'');
    141     quote = *ptr++;
    142     str = g_string_new(NULL);
    143 
    144     while (*ptr != quote) {
    145         assert(*ptr);
    146         switch (*ptr) {
    147         case '\\':
    148             beg = ptr++;
    149             switch (*ptr++) {
    150             case '"':
    151                 g_string_append_c(str, '"');
    152                 break;
    153             case '\'':
    154                 g_string_append_c(str, '\'');
    155                 break;
    156             case '\\':
    157                 g_string_append_c(str, '\\');
    158                 break;
    159             case '/':
    160                 g_string_append_c(str, '/');
    161                 break;
    162             case 'b':
    163                 g_string_append_c(str, '\b');
    164                 break;
    165             case 'f':
    166                 g_string_append_c(str, '\f');
    167                 break;
    168             case 'n':
    169                 g_string_append_c(str, '\n');
    170                 break;
    171             case 'r':
    172                 g_string_append_c(str, '\r');
    173                 break;
    174             case 't':
    175                 g_string_append_c(str, '\t');
    176                 break;
    177             case 'u':
    178                 cp = cvt4hex(ptr);
    179                 ptr += 4;
    180 
    181                 /* handle surrogate pairs */
    182                 if (cp >= 0xD800 && cp <= 0xDBFF
    183                     && ptr[0] == '\\' && ptr[1] == 'u') {
    184                     /* leading surrogate followed by \u */
    185                     cp = 0x10000 + ((cp & 0x3FF) << 10);
    186                     trailing = cvt4hex(ptr + 2);
    187                     if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
    188                         /* followed by trailing surrogate */
    189                         cp |= trailing & 0x3FF;
    190                         ptr += 6;
    191                     } else {
    192                         cp = -1; /* invalid */
    193                     }
    194                 }
    195 
    196                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
    197                     parse_error(ctxt, token,
    198                                 "%.*s is not a valid Unicode character",
    199                                 (int)(ptr - beg), beg);
    200                     goto out;
    201                 }
    202                 g_string_append(str, utf8_buf);
    203                 break;
    204             default:
    205                 parse_error(ctxt, token, "invalid escape sequence in string");
    206                 goto out;
    207             }
    208             break;
    209         case '%':
    210             if (ctxt->ap) {
    211                 if (ptr[1] != '%') {
    212                     parse_error(ctxt, token, "can't interpolate into string");
    213                     goto out;
    214                 }
    215                 ptr++;
    216             }
    217             /* fall through */
    218         default:
    219             cp = mod_utf8_codepoint(ptr, 6, &end);
    220             if (cp < 0) {
    221                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
    222                 goto out;
    223             }
    224             ptr = end;
    225             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
    226             assert(len >= 0);
    227             g_string_append(str, utf8_buf);
    228         }
    229     }
    230 
    231     return qstring_from_gstring(str);
    232 
    233 out:
    234     g_string_free(str, true);
    235     return NULL;
    236 }
    237 
    238 /* Note: the token object returned by parser_context_peek_token or
    239  * parser_context_pop_token is deleted as soon as parser_context_pop_token
    240  * is called again.
    241  */
    242 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
    243 {
    244     g_free(ctxt->current);
    245     ctxt->current = g_queue_pop_head(ctxt->buf);
    246     return ctxt->current;
    247 }
    248 
    249 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
    250 {
    251     return g_queue_peek_head(ctxt->buf);
    252 }
    253 
    254 /**
    255  * Parsing rules
    256  */
    257 static int parse_pair(JSONParserContext *ctxt, QDict *dict)
    258 {
    259     QObject *key_obj = NULL;
    260     QString *key;
    261     QObject *value;
    262     JSONToken *peek, *token;
    263 
    264     peek = parser_context_peek_token(ctxt);
    265     if (peek == NULL) {
    266         parse_error(ctxt, NULL, "premature EOI");
    267         goto out;
    268     }
    269 
    270     key_obj = parse_value(ctxt);
    271     key = qobject_to(QString, key_obj);
    272     if (!key) {
    273         parse_error(ctxt, peek, "key is not a string in object");
    274         goto out;
    275     }
    276 
    277     token = parser_context_pop_token(ctxt);
    278     if (token == NULL) {
    279         parse_error(ctxt, NULL, "premature EOI");
    280         goto out;
    281     }
    282 
    283     if (token->type != JSON_COLON) {
    284         parse_error(ctxt, token, "missing : in object pair");
    285         goto out;
    286     }
    287 
    288     value = parse_value(ctxt);
    289     if (value == NULL) {
    290         parse_error(ctxt, token, "Missing value in dict");
    291         goto out;
    292     }
    293 
    294     if (qdict_haskey(dict, qstring_get_str(key))) {
    295         parse_error(ctxt, token, "duplicate key");
    296         goto out;
    297     }
    298 
    299     qdict_put_obj(dict, qstring_get_str(key), value);
    300 
    301     qobject_unref(key_obj);
    302     return 0;
    303 
    304 out:
    305     qobject_unref(key_obj);
    306     return -1;
    307 }
    308 
    309 static QObject *parse_object(JSONParserContext *ctxt)
    310 {
    311     QDict *dict = NULL;
    312     JSONToken *token, *peek;
    313 
    314     token = parser_context_pop_token(ctxt);
    315     assert(token && token->type == JSON_LCURLY);
    316 
    317     dict = qdict_new();
    318 
    319     peek = parser_context_peek_token(ctxt);
    320     if (peek == NULL) {
    321         parse_error(ctxt, NULL, "premature EOI");
    322         goto out;
    323     }
    324 
    325     if (peek->type != JSON_RCURLY) {
    326         if (parse_pair(ctxt, dict) == -1) {
    327             goto out;
    328         }
    329 
    330         token = parser_context_pop_token(ctxt);
    331         if (token == NULL) {
    332             parse_error(ctxt, NULL, "premature EOI");
    333             goto out;
    334         }
    335 
    336         while (token->type != JSON_RCURLY) {
    337             if (token->type != JSON_COMMA) {
    338                 parse_error(ctxt, token, "expected separator in dict");
    339                 goto out;
    340             }
    341 
    342             if (parse_pair(ctxt, dict) == -1) {
    343                 goto out;
    344             }
    345 
    346             token = parser_context_pop_token(ctxt);
    347             if (token == NULL) {
    348                 parse_error(ctxt, NULL, "premature EOI");
    349                 goto out;
    350             }
    351         }
    352     } else {
    353         (void)parser_context_pop_token(ctxt);
    354     }
    355 
    356     return QOBJECT(dict);
    357 
    358 out:
    359     qobject_unref(dict);
    360     return NULL;
    361 }
    362 
    363 static QObject *parse_array(JSONParserContext *ctxt)
    364 {
    365     QList *list = NULL;
    366     JSONToken *token, *peek;
    367 
    368     token = parser_context_pop_token(ctxt);
    369     assert(token && token->type == JSON_LSQUARE);
    370 
    371     list = qlist_new();
    372 
    373     peek = parser_context_peek_token(ctxt);
    374     if (peek == NULL) {
    375         parse_error(ctxt, NULL, "premature EOI");
    376         goto out;
    377     }
    378 
    379     if (peek->type != JSON_RSQUARE) {
    380         QObject *obj;
    381 
    382         obj = parse_value(ctxt);
    383         if (obj == NULL) {
    384             parse_error(ctxt, token, "expecting value");
    385             goto out;
    386         }
    387 
    388         qlist_append_obj(list, obj);
    389 
    390         token = parser_context_pop_token(ctxt);
    391         if (token == NULL) {
    392             parse_error(ctxt, NULL, "premature EOI");
    393             goto out;
    394         }
    395 
    396         while (token->type != JSON_RSQUARE) {
    397             if (token->type != JSON_COMMA) {
    398                 parse_error(ctxt, token, "expected separator in list");
    399                 goto out;
    400             }
    401 
    402             obj = parse_value(ctxt);
    403             if (obj == NULL) {
    404                 parse_error(ctxt, token, "expecting value");
    405                 goto out;
    406             }
    407 
    408             qlist_append_obj(list, obj);
    409 
    410             token = parser_context_pop_token(ctxt);
    411             if (token == NULL) {
    412                 parse_error(ctxt, NULL, "premature EOI");
    413                 goto out;
    414             }
    415         }
    416     } else {
    417         (void)parser_context_pop_token(ctxt);
    418     }
    419 
    420     return QOBJECT(list);
    421 
    422 out:
    423     qobject_unref(list);
    424     return NULL;
    425 }
    426 
    427 static QObject *parse_keyword(JSONParserContext *ctxt)
    428 {
    429     JSONToken *token;
    430 
    431     token = parser_context_pop_token(ctxt);
    432     assert(token && token->type == JSON_KEYWORD);
    433 
    434     if (!strcmp(token->str, "true")) {
    435         return QOBJECT(qbool_from_bool(true));
    436     } else if (!strcmp(token->str, "false")) {
    437         return QOBJECT(qbool_from_bool(false));
    438     } else if (!strcmp(token->str, "null")) {
    439         return QOBJECT(qnull());
    440     }
    441     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
    442     return NULL;
    443 }
    444 
    445 static QObject *parse_interpolation(JSONParserContext *ctxt)
    446 {
    447     JSONToken *token;
    448 
    449     token = parser_context_pop_token(ctxt);
    450     assert(token && token->type == JSON_INTERP);
    451 
    452     if (!strcmp(token->str, "%p")) {
    453         return va_arg(*ctxt->ap, QObject *);
    454     } else if (!strcmp(token->str, "%i")) {
    455         return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
    456     } else if (!strcmp(token->str, "%d")) {
    457         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
    458     } else if (!strcmp(token->str, "%ld")) {
    459         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
    460     } else if (!strcmp(token->str, "%lld")) {
    461         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
    462     } else if (!strcmp(token->str, "%" PRId64)) {
    463         return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
    464     } else if (!strcmp(token->str, "%u")) {
    465         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
    466     } else if (!strcmp(token->str, "%lu")) {
    467         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
    468     } else if (!strcmp(token->str, "%llu")) {
    469         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
    470     } else if (!strcmp(token->str, "%" PRIu64)) {
    471         return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
    472     } else if (!strcmp(token->str, "%s")) {
    473         return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
    474     } else if (!strcmp(token->str, "%f")) {
    475         return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
    476     }
    477     parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
    478     return NULL;
    479 }
    480 
    481 static QObject *parse_literal(JSONParserContext *ctxt)
    482 {
    483     JSONToken *token;
    484 
    485     token = parser_context_pop_token(ctxt);
    486     assert(token);
    487 
    488     switch (token->type) {
    489     case JSON_STRING:
    490         return QOBJECT(parse_string(ctxt, token));
    491     case JSON_INTEGER: {
    492         /*
    493          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
    494          * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
    495          * and qemu_strtou64() fail with ERANGE when it's not
    496          * possible.
    497          *
    498          * qnum_get_int() will then work for any signed 64-bit
    499          * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
    500          * integer, and qnum_get_double() both for any JSON_INTEGER
    501          * and any JSON_FLOAT (with precision loss for integers beyond
    502          * 53 bits)
    503          */
    504         int ret;
    505         int64_t value;
    506         uint64_t uvalue;
    507 
    508         ret = qemu_strtoi64(token->str, NULL, 10, &value);
    509         if (!ret) {
    510             return QOBJECT(qnum_from_int(value));
    511         }
    512         assert(ret == -ERANGE);
    513 
    514         if (token->str[0] != '-') {
    515             ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
    516             if (!ret) {
    517                 return QOBJECT(qnum_from_uint(uvalue));
    518             }
    519             assert(ret == -ERANGE);
    520         }
    521     }
    522     /* fall through to JSON_FLOAT */
    523     case JSON_FLOAT:
    524         /* FIXME dependent on locale; a pervasive issue in QEMU */
    525         /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
    526          * but those might be useful extensions beyond JSON */
    527         return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
    528     default:
    529         abort();
    530     }
    531 }
    532 
    533 static QObject *parse_value(JSONParserContext *ctxt)
    534 {
    535     JSONToken *token;
    536 
    537     token = parser_context_peek_token(ctxt);
    538     if (token == NULL) {
    539         parse_error(ctxt, NULL, "premature EOI");
    540         return NULL;
    541     }
    542 
    543     switch (token->type) {
    544     case JSON_LCURLY:
    545         return parse_object(ctxt);
    546     case JSON_LSQUARE:
    547         return parse_array(ctxt);
    548     case JSON_INTERP:
    549         return parse_interpolation(ctxt);
    550     case JSON_INTEGER:
    551     case JSON_FLOAT:
    552     case JSON_STRING:
    553         return parse_literal(ctxt);
    554     case JSON_KEYWORD:
    555         return parse_keyword(ctxt);
    556     default:
    557         parse_error(ctxt, token, "expecting value");
    558         return NULL;
    559     }
    560 }
    561 
    562 JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
    563 {
    564     JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
    565 
    566     token->type = type;
    567     memcpy(token->str, tokstr->str, tokstr->len);
    568     token->str[tokstr->len] = 0;
    569     token->x = x;
    570     token->y = y;
    571     return token;
    572 }
    573 
    574 QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
    575 {
    576     JSONParserContext ctxt = { .buf = tokens, .ap = ap };
    577     QObject *result;
    578 
    579     result = parse_value(&ctxt);
    580     assert(ctxt.err || g_queue_is_empty(ctxt.buf));
    581 
    582     error_propagate(errp, ctxt.err);
    583 
    584     while (!g_queue_is_empty(ctxt.buf)) {
    585         parser_context_pop_token(&ctxt);
    586     }
    587     g_free(ctxt.current);
    588 
    589     return result;
    590 }