diff options
Diffstat (limited to 'qobject/json-parser.c')
-rw-r--r-- | qobject/json-parser.c | 381 |
1 files changed, 183 insertions, 198 deletions
diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a5aa790d62..5a840dfd86 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/unicode.h" #include "qapi/error.h" #include "qemu-common.h" #include "qapi/qmp/qbool.h" @@ -21,15 +22,21 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" + +struct JSONToken { + JSONTokenType type; + int x; + int y; + char str[]; +}; typedef struct JSONParserContext { Error *err; JSONToken *current; GQueue *buf; + va_list *ap; } JSONParserContext; #define BUG_ON(cond) assert(!(cond)) @@ -43,7 +50,7 @@ typedef struct JSONParserContext * 4) deal with premature EOI */ -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); +static QObject *parse_value(JSONParserContext *ctxt); /** * Error handler @@ -53,169 +60,170 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, { va_list ap; char message[1024]; + + if (ctxt->err) { + return; + } va_start(ap, msg); vsnprintf(message, sizeof(message), msg, ap); va_end(ap); - if (ctxt->err) { - error_free(ctxt->err); - ctxt->err = NULL; - } error_setg(&ctxt->err, "JSON parse error, %s", message); } -/** - * String helpers - * - * These helpers are used to unescape strings. - */ -static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) +static int cvt4hex(const char *s) { - if (wchar <= 0x007F) { - BUG_ON(buffer_length < 2); - - buffer[0] = wchar & 0x7F; - buffer[1] = 0; - } else if (wchar <= 0x07FF) { - BUG_ON(buffer_length < 3); - - buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); - buffer[1] = 0x80 | (wchar & 0x3F); - buffer[2] = 0; - } else { - BUG_ON(buffer_length < 4); + int cp, i; - buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); - buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); - buffer[2] = 0x80 | (wchar & 0x3F); - buffer[3] = 0; - } -} - -static int hex2decimal(char ch) -{ - if (ch >= '0' && ch <= '9') { - return (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - return 10 + (ch - 'a'); - } else if (ch >= 'A' && ch <= 'F') { - return 10 + (ch - 'A'); + cp = 0; + for (i = 0; i < 4; i++) { + if (!qemu_isxdigit(s[i])) { + return -1; + } + cp <<= 4; + if (s[i] >= '0' && s[i] <= '9') { + cp |= s[i] - '0'; + } else if (s[i] >= 'a' && s[i] <= 'f') { + cp |= 10 + s[i] - 'a'; + } else if (s[i] >= 'A' && s[i] <= 'F') { + cp |= 10 + s[i] - 'A'; + } else { + return -1; + } } - - return -1; + return cp; } /** - * parse_string(): Parse a json string and return a QObject + * parse_string(): Parse a JSON string + * + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format": + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. * - * string - * "" - * " chars " - * chars - * char - * char chars - * char - * any-Unicode-character- - * except-"-or-\-or- - * control-character - * \" - * \\ - * \/ - * \b - * \f - * \n - * \r - * \t - * \u four-hex-digits + * Note: + * - Encoding is modified UTF-8. + * - Invalid Unicode characters are rejected. + * - Control characters \x00..\x1F are rejected by the lexer. */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) +static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) { const char *ptr = token->str; QString *str; - int double_quote = 1; - - if (*ptr == '"') { - double_quote = 1; - } else { - double_quote = 0; - } - ptr++; - + char quote; + const char *beg; + int cp, trailing; + char *end; + ssize_t len; + char utf8_buf[5]; + + assert(*ptr == '"' || *ptr == '\''); + quote = *ptr++; str = qstring_new(); - while (*ptr && - ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { - if (*ptr == '\\') { - ptr++; - switch (*ptr) { + while (*ptr != quote) { + assert(*ptr); + switch (*ptr) { + case '\\': + beg = ptr++; + switch (*ptr++) { case '"': - qstring_append(str, "\""); - ptr++; + qstring_append_chr(str, '"'); break; case '\'': - qstring_append(str, "'"); - ptr++; + qstring_append_chr(str, '\''); break; case '\\': - qstring_append(str, "\\"); - ptr++; + qstring_append_chr(str, '\\'); break; case '/': - qstring_append(str, "/"); - ptr++; + qstring_append_chr(str, '/'); break; case 'b': - qstring_append(str, "\b"); - ptr++; + qstring_append_chr(str, '\b'); break; case 'f': - qstring_append(str, "\f"); - ptr++; + qstring_append_chr(str, '\f'); break; case 'n': - qstring_append(str, "\n"); - ptr++; + qstring_append_chr(str, '\n'); break; case 'r': - qstring_append(str, "\r"); - ptr++; + qstring_append_chr(str, '\r'); break; case 't': - qstring_append(str, "\t"); - ptr++; + qstring_append_chr(str, '\t'); break; - case 'u': { - uint16_t unicode_char = 0; - char utf8_char[4]; - int i = 0; - - ptr++; - - for (i = 0; i < 4; i++) { - if (qemu_isxdigit(*ptr)) { - unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); + case 'u': + cp = cvt4hex(ptr); + ptr += 4; + + /* handle surrogate pairs */ + if (cp >= 0xD800 && cp <= 0xDBFF + && ptr[0] == '\\' && ptr[1] == 'u') { + /* leading surrogate followed by \u */ + cp = 0x10000 + ((cp & 0x3FF) << 10); + trailing = cvt4hex(ptr + 2); + if (trailing >= 0xDC00 && trailing <= 0xDFFF) { + /* followed by trailing surrogate */ + cp |= trailing & 0x3FF; + ptr += 6; } else { - parse_error(ctxt, token, - "invalid hex escape sequence in string"); - goto out; + cp = -1; /* invalid */ } - ptr++; } - wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); - qstring_append(str, utf8_char); - } break; + if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { + parse_error(ctxt, token, + "%.*s is not a valid Unicode character", + (int)(ptr - beg), beg); + goto out; + } + qstring_append(str, utf8_buf); + break; default: parse_error(ctxt, token, "invalid escape sequence in string"); goto out; } - } else { - char dummy[2]; - - dummy[0] = *ptr++; - dummy[1] = 0; - - qstring_append(str, dummy); + break; + case '%': + if (ctxt->ap && ptr[1] != '%') { + parse_error(ctxt, token, "can't interpolate into string"); + goto out; + } + ptr++; + /* fall through */ + default: + cp = mod_utf8_codepoint(ptr, 6, &end); + if (cp < 0) { + parse_error(ctxt, token, "invalid UTF-8 sequence in string"); + goto out; + } + ptr = end; + len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); + assert(len >= 0); + qstring_append(str, utf8_buf); } } @@ -233,48 +241,19 @@ out: static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) { g_free(ctxt->current); - assert(!g_queue_is_empty(ctxt->buf)); ctxt->current = g_queue_pop_head(ctxt->buf); return ctxt->current; } static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) { - assert(!g_queue_is_empty(ctxt->buf)); return g_queue_peek_head(ctxt->buf); } -static JSONParserContext *parser_context_new(GQueue *tokens) -{ - JSONParserContext *ctxt; - - if (!tokens) { - return NULL; - } - - ctxt = g_malloc0(sizeof(JSONParserContext)); - ctxt->buf = tokens; - - return ctxt; -} - -/* to support error propagation, ctxt->err must be freed separately */ -static void parser_context_free(JSONParserContext *ctxt) -{ - if (ctxt) { - while (!g_queue_is_empty(ctxt->buf)) { - parser_context_pop_token(ctxt); - } - g_free(ctxt->current); - g_queue_free(ctxt->buf); - g_free(ctxt); - } -} - /** * Parsing rules */ -static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) +static int parse_pair(JSONParserContext *ctxt, QDict *dict) { QObject *value; QString *key = NULL; @@ -286,7 +265,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - key = qobject_to(QString, parse_value(ctxt, ap)); + key = qobject_to(QString, parse_value(ctxt)); if (!key) { parse_error(ctxt, peek, "key is not a string in object"); goto out; @@ -303,7 +282,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - value = parse_value(ctxt, ap); + value = parse_value(ctxt); if (value == NULL) { parse_error(ctxt, token, "Missing value in dict"); goto out; @@ -321,7 +300,7 @@ out: return -1; } -static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_object(JSONParserContext *ctxt) { QDict *dict = NULL; JSONToken *token, *peek; @@ -338,7 +317,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) } if (peek->type != JSON_RCURLY) { - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -354,7 +333,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -375,7 +354,7 @@ out: return NULL; } -static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_array(JSONParserContext *ctxt) { QList *list = NULL; JSONToken *token, *peek; @@ -394,7 +373,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) if (peek->type != JSON_RSQUARE) { QObject *obj; - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -414,7 +393,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -457,40 +436,39 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); - assert(token && token->type == JSON_ESCAPE); + assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { - return va_arg(*ap, QObject *); + return va_arg(*ctxt->ap, QObject *); } else if (!strcmp(token->str, "%i")) { - return QOBJECT(qbool_from_bool(va_arg(*ap, int))); + return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, int))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%ld")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long))); - } else if (!strcmp(token->str, "%lld") || - !strcmp(token->str, "%I64d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long long))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); + } else if (!strcmp(token->str, "%lld")) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); + } else if (!strcmp(token->str, "%" PRId64)) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); } else if (!strcmp(token->str, "%u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); } else if (!strcmp(token->str, "%lu")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long))); - } else if (!strcmp(token->str, "%llu") || - !strcmp(token->str, "%I64u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); + } else if (!strcmp(token->str, "%llu")) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); + } else if (!strcmp(token->str, "%" PRIu64)) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); } else if (!strcmp(token->str, "%s")) { - return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); } else if (!strcmp(token->str, "%f")) { - return QOBJECT(qnum_from_double(va_arg(*ap, double))); + return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); } + parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; } @@ -503,7 +481,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(parse_string(ctxt, token)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as @@ -538,7 +516,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } case JSON_FLOAT: /* FIXME dependent on locale; a pervasive issue in QEMU */ - /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN, + /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, * but those might be useful extensions beyond JSON */ return QOBJECT(qnum_from_double(strtod(token->str, NULL))); default: @@ -546,7 +524,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } } -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_value(JSONParserContext *ctxt) { JSONToken *token; @@ -558,11 +536,11 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) switch (token->type) { case JSON_LCURLY: - return parse_object(ctxt, ap); + return parse_object(ctxt); case JSON_LSQUARE: - return parse_array(ctxt, ap); - case JSON_ESCAPE: - return parse_escape(ctxt, ap); + return parse_array(ctxt); + case JSON_INTERP: + return parse_interpolation(ctxt); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: @@ -575,25 +553,32 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) } } -QObject *json_parser_parse(GQueue *tokens, va_list *ap) +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) { - return json_parser_parse_err(tokens, ap, NULL); + JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); + + token->type = type; + memcpy(token->str, tokstr->str, tokstr->len); + token->str[tokstr->len] = 0; + token->x = x; + token->y = y; + return token; } -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { - JSONParserContext *ctxt = parser_context_new(tokens); + JSONParserContext ctxt = { .buf = tokens, .ap = ap }; QObject *result; - if (!ctxt) { - return NULL; - } - - result = parse_value(ctxt, ap); + result = parse_value(&ctxt); + assert(ctxt.err || g_queue_is_empty(ctxt.buf)); - error_propagate(errp, ctxt->err); + error_propagate(errp, ctxt.err); - parser_context_free(ctxt); + while (!g_queue_is_empty(ctxt.buf)) { + parser_context_pop_token(&ctxt); + } + g_free(ctxt.current); return result; } |