diff options
Diffstat (limited to 'qobject')
-rw-r--r-- | qobject/json-lexer.c | 313 | ||||
-rw-r--r-- | qobject/json-parser-int.h | 54 | ||||
-rw-r--r-- | qobject/json-parser.c | 381 | ||||
-rw-r--r-- | qobject/json-streamer.c | 116 | ||||
-rw-r--r-- | qobject/qbool.c | 1 | ||||
-rw-r--r-- | qobject/qjson.c | 31 | ||||
-rw-r--r-- | qobject/qlist.c | 1 | ||||
-rw-r--r-- | qobject/qnull.c | 1 | ||||
-rw-r--r-- | qobject/qnum.c | 1 | ||||
-rw-r--r-- | qobject/qobject.c | 1 | ||||
-rw-r--r-- | qobject/qstring.c | 1 |
11 files changed, 455 insertions, 446 deletions
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 980ba159d6..e1745a3d95 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -12,63 +12,116 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/qmp/json-lexer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) /* - * Required by JSON (RFC 7159): + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format", with [comments in brackets]: * - * \"([^\\\"]|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*\" - * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)? - * [{}\[\],:] - * [a-z]+ # covers null, true, false + * The set of tokens includes six structural characters, strings, + * numbers, and three literal names. * - * Extension of '' strings: + * These are the six structural characters: * - * '([^\\']|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*' + * begin-array = ws %x5B ws ; [ left square bracket + * begin-object = ws %x7B ws ; { left curly bracket + * end-array = ws %x5D ws ; ] right square bracket + * end-object = ws %x7D ws ; } right curly bracket + * name-separator = ws %x3A ws ; : colon + * value-separator = ws %x2C ws ; , comma * - * Extension for vararg handling in JSON construction: + * Insignificant whitespace is allowed before or after any of the six + * structural characters. + * [This lexer accepts it before or after any token, which is actually + * the same, as the grammar always has structural characters between + * other tokens.] * - * %((l|ll|I64)?d|[ipsf]) + * ws = *( + * %x20 / ; Space + * %x09 / ; Horizontal tab + * %x0A / ; Line feed or New line + * %x0D ) ; Carriage return * + * [...] three literal names: + * false null true + * [This lexer accepts [a-z]+, and leaves rejecting unknown literal + * names to the parser.] + * + * [Numbers:] + * + * number = [ minus ] int [ frac ] [ exp ] + * decimal-point = %x2E ; . + * digit1-9 = %x31-39 ; 1-9 + * e = %x65 / %x45 ; e E + * exp = e [ minus / plus ] 1*DIGIT + * frac = decimal-point 1*DIGIT + * int = zero / ( digit1-9 *DIGIT ) + * minus = %x2D ; - + * plus = %x2B ; + + * zero = %x30 ; 0 + * + * [Strings:] + * string = quotation-mark *char quotation-mark + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * [This lexer accepts any non-control character after escape, and + * leaves rejecting invalid ones to the parser.] + * + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. + * - Interpolation, if enabled: + * The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid + * ones to the parser. + * + * Note: + * - Input must be encoded in modified UTF-8. + * - Decoding and validating is left to the parser. */ enum json_lexer_state { IN_ERROR = 0, /* must really be 0, see json_lexer[] */ - IN_DQ_UCODE3, - IN_DQ_UCODE2, - IN_DQ_UCODE1, - IN_DQ_UCODE0, IN_DQ_STRING_ESCAPE, IN_DQ_STRING, - IN_SQ_UCODE3, - IN_SQ_UCODE2, - IN_SQ_UCODE1, - IN_SQ_UCODE0, IN_SQ_STRING_ESCAPE, IN_SQ_STRING, IN_ZERO, - IN_DIGITS, - IN_DIGIT, + IN_EXP_DIGITS, + IN_EXP_SIGN, IN_EXP_E, IN_MANTISSA, IN_MANTISSA_DIGITS, - IN_NONZERO_NUMBER, - IN_NEG_NONZERO_NUMBER, + IN_DIGITS, + IN_SIGN, IN_KEYWORD, - IN_ESCAPE, - IN_ESCAPE_L, - IN_ESCAPE_LL, - IN_ESCAPE_I, - IN_ESCAPE_I6, - IN_ESCAPE_I64, + IN_INTERP, IN_WHITESPACE, IN_START, + IN_START_INTERP, /* must be IN_START + 1 */ }; -QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); +QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); +QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); #define TERMINAL(state) [0 ... 0x7F] = (state) @@ -76,87 +129,27 @@ QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); from OLD_STATE required lookahead. This happens whenever the table below uses the TERMINAL macro. */ #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ - (json_lexer[(old_state)][0] == (terminal)) + (terminal != IN_ERROR && json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { /* Relies on default initialization to IN_ERROR! */ /* double quote string */ - [IN_DQ_UCODE3] = { - ['0' ... '9'] = IN_DQ_STRING, - ['a' ... 'f'] = IN_DQ_STRING, - ['A' ... 'F'] = IN_DQ_STRING, - }, - [IN_DQ_UCODE2] = { - ['0' ... '9'] = IN_DQ_UCODE3, - ['a' ... 'f'] = IN_DQ_UCODE3, - ['A' ... 'F'] = IN_DQ_UCODE3, - }, - [IN_DQ_UCODE1] = { - ['0' ... '9'] = IN_DQ_UCODE2, - ['a' ... 'f'] = IN_DQ_UCODE2, - ['A' ... 'F'] = IN_DQ_UCODE2, - }, - [IN_DQ_UCODE0] = { - ['0' ... '9'] = IN_DQ_UCODE1, - ['a' ... 'f'] = IN_DQ_UCODE1, - ['A' ... 'F'] = IN_DQ_UCODE1, - }, [IN_DQ_STRING_ESCAPE] = { - ['b'] = IN_DQ_STRING, - ['f'] = IN_DQ_STRING, - ['n'] = IN_DQ_STRING, - ['r'] = IN_DQ_STRING, - ['t'] = IN_DQ_STRING, - ['/'] = IN_DQ_STRING, - ['\\'] = IN_DQ_STRING, - ['\''] = IN_DQ_STRING, - ['\"'] = IN_DQ_STRING, - ['u'] = IN_DQ_UCODE0, + [0x20 ... 0xFD] = IN_DQ_STRING, }, [IN_DQ_STRING] = { - [1 ... 0xBF] = IN_DQ_STRING, - [0xC2 ... 0xF4] = IN_DQ_STRING, + [0x20 ... 0xFD] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, }, /* single quote string */ - [IN_SQ_UCODE3] = { - ['0' ... '9'] = IN_SQ_STRING, - ['a' ... 'f'] = IN_SQ_STRING, - ['A' ... 'F'] = IN_SQ_STRING, - }, - [IN_SQ_UCODE2] = { - ['0' ... '9'] = IN_SQ_UCODE3, - ['a' ... 'f'] = IN_SQ_UCODE3, - ['A' ... 'F'] = IN_SQ_UCODE3, - }, - [IN_SQ_UCODE1] = { - ['0' ... '9'] = IN_SQ_UCODE2, - ['a' ... 'f'] = IN_SQ_UCODE2, - ['A' ... 'F'] = IN_SQ_UCODE2, - }, - [IN_SQ_UCODE0] = { - ['0' ... '9'] = IN_SQ_UCODE1, - ['a' ... 'f'] = IN_SQ_UCODE1, - ['A' ... 'F'] = IN_SQ_UCODE1, - }, [IN_SQ_STRING_ESCAPE] = { - ['b'] = IN_SQ_STRING, - ['f'] = IN_SQ_STRING, - ['n'] = IN_SQ_STRING, - ['r'] = IN_SQ_STRING, - ['t'] = IN_SQ_STRING, - ['/'] = IN_SQ_STRING, - ['\\'] = IN_SQ_STRING, - ['\''] = IN_SQ_STRING, - ['\"'] = IN_SQ_STRING, - ['u'] = IN_SQ_UCODE0, + [0x20 ... 0xFD] = IN_SQ_STRING, }, [IN_SQ_STRING] = { - [1 ... 0xBF] = IN_SQ_STRING, - [0xC2 ... 0xF4] = IN_SQ_STRING, + [0x20 ... 0xFD] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, }, @@ -169,19 +162,19 @@ static const uint8_t json_lexer[][256] = { }, /* Float */ - [IN_DIGITS] = { + [IN_EXP_DIGITS] = { TERMINAL(JSON_FLOAT), - ['0' ... '9'] = IN_DIGITS, + ['0' ... '9'] = IN_EXP_DIGITS, }, - [IN_DIGIT] = { - ['0' ... '9'] = IN_DIGITS, + [IN_EXP_SIGN] = { + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_EXP_E] = { - ['-'] = IN_DIGIT, - ['+'] = IN_DIGIT, - ['0' ... '9'] = IN_DIGITS, + ['-'] = IN_EXP_SIGN, + ['+'] = IN_EXP_SIGN, + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_MANTISSA_DIGITS] = { @@ -196,17 +189,17 @@ static const uint8_t json_lexer[][256] = { }, /* Number */ - [IN_NONZERO_NUMBER] = { + [IN_DIGITS] = { TERMINAL(JSON_INTEGER), - ['0' ... '9'] = IN_NONZERO_NUMBER, + ['0' ... '9'] = IN_DIGITS, ['e'] = IN_EXP_E, ['E'] = IN_EXP_E, ['.'] = IN_MANTISSA, }, - [IN_NEG_NONZERO_NUMBER] = { + [IN_SIGN] = { ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, }, /* keywords */ @@ -224,49 +217,25 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* escape */ - [IN_ESCAPE_LL] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_L] = { - ['d'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_LL, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_I64] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_I6] = { - ['4'] = IN_ESCAPE_I64, + /* interpolation */ + [IN_INTERP] = { + TERMINAL(JSON_INTERP), + ['A' ... 'Z'] = IN_INTERP, + ['a' ... 'z'] = IN_INTERP, + ['0' ... '9'] = IN_INTERP, }, - [IN_ESCAPE_I] = { - ['6'] = IN_ESCAPE_I6, - }, - - [IN_ESCAPE] = { - ['d'] = JSON_ESCAPE, - ['i'] = JSON_ESCAPE, - ['p'] = JSON_ESCAPE, - ['s'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - ['f'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_L, - ['I'] = IN_ESCAPE_I, - }, - - /* top level rule */ - [IN_START] = { + /* + * Two start states: + * - IN_START recognizes JSON tokens with our string extensions + * - IN_START_INTERP additionally recognizes interpolation. + */ + [IN_START ... IN_START_INTERP] = { ['"'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, - ['-'] = IN_NEG_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, + ['-'] = IN_SIGN, ['{'] = JSON_LCURLY, ['}'] = JSON_RCURLY, ['['] = JSON_LSQUARE, @@ -274,23 +243,23 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, ['\n'] = IN_WHITESPACE, }, + [IN_START_INTERP]['%'] = IN_INTERP, }; -void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) { - lexer->emit = func; - lexer->state = IN_START; + lexer->start_state = lexer->state = enable_interpolation + ? IN_START_INTERP : IN_START; lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; } -static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) +static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) { int char_consumed, new_state; @@ -304,7 +273,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) assert(lexer->state <= ARRAY_SIZE(json_lexer)); new_state = json_lexer[lexer->state][(uint8_t)ch]; char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); - if (char_consumed) { + if (char_consumed && !flush) { g_string_append_c(lexer->token, ch); } @@ -315,23 +284,23 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) case JSON_RSQUARE: case JSON_COLON: case JSON_COMMA: - case JSON_ESCAPE: + case JSON_INTERP: case JSON_INTEGER: case JSON_FLOAT: case JSON_KEYWORD: case JSON_STRING: - lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, new_state, + lexer->x, lexer->y); /* fall through */ case JSON_SKIP: g_string_truncate(lexer->token, 0); - new_state = IN_START; + new_state = lexer->start_state; break; case IN_ERROR: /* XXX: To avoid having previous bad input leaving the parser in an * unresponsive state where we consume unpredictable amounts of * subsequent "good" input, percolate this error state up to the - * tokenizer/parser by forcing a NULL object to be emitted, then - * reset state. + * parser by emitting a JSON_ERROR token, then reset lexer state. * * Also note that this handling is required for reliable channel * negotiation between QMP and the guest agent, since chr(0xFF) @@ -340,11 +309,11 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * never a valid ASCII/UTF-8 sequence, so this should reliably * induce an error/flush state. */ - lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, JSON_ERROR, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - new_state = IN_START; - lexer->state = new_state; - return 0; + lexer->state = lexer->start_state; + return; default: break; } @@ -355,33 +324,29 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * this is a security consideration. */ if (lexer->token->len > MAX_TOKEN_SIZE) { - lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, lexer->state, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - lexer->state = IN_START; + lexer->state = lexer->start_state; } - - return 0; } -int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) { size_t i; for (i = 0; i < size; i++) { - int err; - - err = json_lexer_feed_char(lexer, buffer[i], false); - if (err < 0) { - return err; - } + json_lexer_feed_char(lexer, buffer[i], false); } - - return 0; } -int json_lexer_flush(JSONLexer *lexer) +void json_lexer_flush(JSONLexer *lexer) { - return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); + if (lexer->state != lexer->start_state) { + json_lexer_feed_char(lexer, 0, true); + } + json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT, + lexer->x, lexer->y); } void json_lexer_destroy(JSONLexer *lexer) diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h new file mode 100644 index 0000000000..ceaa890ec6 --- /dev/null +++ b/qobject/json-parser-int.h @@ -0,0 +1,54 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef JSON_PARSER_INT_H +#define JSON_PARSER_INT_H + +#include "qapi/qmp/json-parser.h" + + +typedef enum json_token_type { + JSON_MIN = 100, + JSON_LCURLY = JSON_MIN, + JSON_RCURLY, + JSON_LSQUARE, + JSON_RSQUARE, + JSON_COLON, + JSON_COMMA, + JSON_INTEGER, + JSON_FLOAT, + JSON_KEYWORD, + JSON_STRING, + JSON_INTERP, + JSON_SKIP, + JSON_ERROR, + JSON_END_OF_INPUT, +} JSONTokenType; + +typedef struct JSONToken JSONToken; + +/* json-lexer.c */ +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); +void json_lexer_flush(JSONLexer *lexer); +void json_lexer_destroy(JSONLexer *lexer); + +/* json-streamer.c */ +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y); + +/* json-parser.c */ +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); + +#endif diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a5aa790d62..5a840dfd86 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/unicode.h" #include "qapi/error.h" #include "qemu-common.h" #include "qapi/qmp/qbool.h" @@ -21,15 +22,21 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" + +struct JSONToken { + JSONTokenType type; + int x; + int y; + char str[]; +}; typedef struct JSONParserContext { Error *err; JSONToken *current; GQueue *buf; + va_list *ap; } JSONParserContext; #define BUG_ON(cond) assert(!(cond)) @@ -43,7 +50,7 @@ typedef struct JSONParserContext * 4) deal with premature EOI */ -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); +static QObject *parse_value(JSONParserContext *ctxt); /** * Error handler @@ -53,169 +60,170 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, { va_list ap; char message[1024]; + + if (ctxt->err) { + return; + } va_start(ap, msg); vsnprintf(message, sizeof(message), msg, ap); va_end(ap); - if (ctxt->err) { - error_free(ctxt->err); - ctxt->err = NULL; - } error_setg(&ctxt->err, "JSON parse error, %s", message); } -/** - * String helpers - * - * These helpers are used to unescape strings. - */ -static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) +static int cvt4hex(const char *s) { - if (wchar <= 0x007F) { - BUG_ON(buffer_length < 2); - - buffer[0] = wchar & 0x7F; - buffer[1] = 0; - } else if (wchar <= 0x07FF) { - BUG_ON(buffer_length < 3); - - buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); - buffer[1] = 0x80 | (wchar & 0x3F); - buffer[2] = 0; - } else { - BUG_ON(buffer_length < 4); + int cp, i; - buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); - buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); - buffer[2] = 0x80 | (wchar & 0x3F); - buffer[3] = 0; - } -} - -static int hex2decimal(char ch) -{ - if (ch >= '0' && ch <= '9') { - return (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - return 10 + (ch - 'a'); - } else if (ch >= 'A' && ch <= 'F') { - return 10 + (ch - 'A'); + cp = 0; + for (i = 0; i < 4; i++) { + if (!qemu_isxdigit(s[i])) { + return -1; + } + cp <<= 4; + if (s[i] >= '0' && s[i] <= '9') { + cp |= s[i] - '0'; + } else if (s[i] >= 'a' && s[i] <= 'f') { + cp |= 10 + s[i] - 'a'; + } else if (s[i] >= 'A' && s[i] <= 'F') { + cp |= 10 + s[i] - 'A'; + } else { + return -1; + } } - - return -1; + return cp; } /** - * parse_string(): Parse a json string and return a QObject + * parse_string(): Parse a JSON string + * + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format": + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. * - * string - * "" - * " chars " - * chars - * char - * char chars - * char - * any-Unicode-character- - * except-"-or-\-or- - * control-character - * \" - * \\ - * \/ - * \b - * \f - * \n - * \r - * \t - * \u four-hex-digits + * Note: + * - Encoding is modified UTF-8. + * - Invalid Unicode characters are rejected. + * - Control characters \x00..\x1F are rejected by the lexer. */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) +static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) { const char *ptr = token->str; QString *str; - int double_quote = 1; - - if (*ptr == '"') { - double_quote = 1; - } else { - double_quote = 0; - } - ptr++; - + char quote; + const char *beg; + int cp, trailing; + char *end; + ssize_t len; + char utf8_buf[5]; + + assert(*ptr == '"' || *ptr == '\''); + quote = *ptr++; str = qstring_new(); - while (*ptr && - ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { - if (*ptr == '\\') { - ptr++; - switch (*ptr) { + while (*ptr != quote) { + assert(*ptr); + switch (*ptr) { + case '\\': + beg = ptr++; + switch (*ptr++) { case '"': - qstring_append(str, "\""); - ptr++; + qstring_append_chr(str, '"'); break; case '\'': - qstring_append(str, "'"); - ptr++; + qstring_append_chr(str, '\''); break; case '\\': - qstring_append(str, "\\"); - ptr++; + qstring_append_chr(str, '\\'); break; case '/': - qstring_append(str, "/"); - ptr++; + qstring_append_chr(str, '/'); break; case 'b': - qstring_append(str, "\b"); - ptr++; + qstring_append_chr(str, '\b'); break; case 'f': - qstring_append(str, "\f"); - ptr++; + qstring_append_chr(str, '\f'); break; case 'n': - qstring_append(str, "\n"); - ptr++; + qstring_append_chr(str, '\n'); break; case 'r': - qstring_append(str, "\r"); - ptr++; + qstring_append_chr(str, '\r'); break; case 't': - qstring_append(str, "\t"); - ptr++; + qstring_append_chr(str, '\t'); break; - case 'u': { - uint16_t unicode_char = 0; - char utf8_char[4]; - int i = 0; - - ptr++; - - for (i = 0; i < 4; i++) { - if (qemu_isxdigit(*ptr)) { - unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); + case 'u': + cp = cvt4hex(ptr); + ptr += 4; + + /* handle surrogate pairs */ + if (cp >= 0xD800 && cp <= 0xDBFF + && ptr[0] == '\\' && ptr[1] == 'u') { + /* leading surrogate followed by \u */ + cp = 0x10000 + ((cp & 0x3FF) << 10); + trailing = cvt4hex(ptr + 2); + if (trailing >= 0xDC00 && trailing <= 0xDFFF) { + /* followed by trailing surrogate */ + cp |= trailing & 0x3FF; + ptr += 6; } else { - parse_error(ctxt, token, - "invalid hex escape sequence in string"); - goto out; + cp = -1; /* invalid */ } - ptr++; } - wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); - qstring_append(str, utf8_char); - } break; + if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { + parse_error(ctxt, token, + "%.*s is not a valid Unicode character", + (int)(ptr - beg), beg); + goto out; + } + qstring_append(str, utf8_buf); + break; default: parse_error(ctxt, token, "invalid escape sequence in string"); goto out; } - } else { - char dummy[2]; - - dummy[0] = *ptr++; - dummy[1] = 0; - - qstring_append(str, dummy); + break; + case '%': + if (ctxt->ap && ptr[1] != '%') { + parse_error(ctxt, token, "can't interpolate into string"); + goto out; + } + ptr++; + /* fall through */ + default: + cp = mod_utf8_codepoint(ptr, 6, &end); + if (cp < 0) { + parse_error(ctxt, token, "invalid UTF-8 sequence in string"); + goto out; + } + ptr = end; + len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); + assert(len >= 0); + qstring_append(str, utf8_buf); } } @@ -233,48 +241,19 @@ out: static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) { g_free(ctxt->current); - assert(!g_queue_is_empty(ctxt->buf)); ctxt->current = g_queue_pop_head(ctxt->buf); return ctxt->current; } static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) { - assert(!g_queue_is_empty(ctxt->buf)); return g_queue_peek_head(ctxt->buf); } -static JSONParserContext *parser_context_new(GQueue *tokens) -{ - JSONParserContext *ctxt; - - if (!tokens) { - return NULL; - } - - ctxt = g_malloc0(sizeof(JSONParserContext)); - ctxt->buf = tokens; - - return ctxt; -} - -/* to support error propagation, ctxt->err must be freed separately */ -static void parser_context_free(JSONParserContext *ctxt) -{ - if (ctxt) { - while (!g_queue_is_empty(ctxt->buf)) { - parser_context_pop_token(ctxt); - } - g_free(ctxt->current); - g_queue_free(ctxt->buf); - g_free(ctxt); - } -} - /** * Parsing rules */ -static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) +static int parse_pair(JSONParserContext *ctxt, QDict *dict) { QObject *value; QString *key = NULL; @@ -286,7 +265,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - key = qobject_to(QString, parse_value(ctxt, ap)); + key = qobject_to(QString, parse_value(ctxt)); if (!key) { parse_error(ctxt, peek, "key is not a string in object"); goto out; @@ -303,7 +282,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - value = parse_value(ctxt, ap); + value = parse_value(ctxt); if (value == NULL) { parse_error(ctxt, token, "Missing value in dict"); goto out; @@ -321,7 +300,7 @@ out: return -1; } -static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_object(JSONParserContext *ctxt) { QDict *dict = NULL; JSONToken *token, *peek; @@ -338,7 +317,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) } if (peek->type != JSON_RCURLY) { - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -354,7 +333,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -375,7 +354,7 @@ out: return NULL; } -static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_array(JSONParserContext *ctxt) { QList *list = NULL; JSONToken *token, *peek; @@ -394,7 +373,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) if (peek->type != JSON_RSQUARE) { QObject *obj; - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -414,7 +393,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -457,40 +436,39 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); - assert(token && token->type == JSON_ESCAPE); + assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { - return va_arg(*ap, QObject *); + return va_arg(*ctxt->ap, QObject *); } else if (!strcmp(token->str, "%i")) { - return QOBJECT(qbool_from_bool(va_arg(*ap, int))); + return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, int))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%ld")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long))); - } else if (!strcmp(token->str, "%lld") || - !strcmp(token->str, "%I64d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long long))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); + } else if (!strcmp(token->str, "%lld")) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); + } else if (!strcmp(token->str, "%" PRId64)) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); } else if (!strcmp(token->str, "%u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); } else if (!strcmp(token->str, "%lu")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long))); - } else if (!strcmp(token->str, "%llu") || - !strcmp(token->str, "%I64u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); + } else if (!strcmp(token->str, "%llu")) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); + } else if (!strcmp(token->str, "%" PRIu64)) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); } else if (!strcmp(token->str, "%s")) { - return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); } else if (!strcmp(token->str, "%f")) { - return QOBJECT(qnum_from_double(va_arg(*ap, double))); + return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); } + parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; } @@ -503,7 +481,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(parse_string(ctxt, token)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as @@ -538,7 +516,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } case JSON_FLOAT: /* FIXME dependent on locale; a pervasive issue in QEMU */ - /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN, + /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, * but those might be useful extensions beyond JSON */ return QOBJECT(qnum_from_double(strtod(token->str, NULL))); default: @@ -546,7 +524,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } } -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_value(JSONParserContext *ctxt) { JSONToken *token; @@ -558,11 +536,11 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) switch (token->type) { case JSON_LCURLY: - return parse_object(ctxt, ap); + return parse_object(ctxt); case JSON_LSQUARE: - return parse_array(ctxt, ap); - case JSON_ESCAPE: - return parse_escape(ctxt, ap); + return parse_array(ctxt); + case JSON_INTERP: + return parse_interpolation(ctxt); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: @@ -575,25 +553,32 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) } } -QObject *json_parser_parse(GQueue *tokens, va_list *ap) +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) { - return json_parser_parse_err(tokens, ap, NULL); + JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); + + token->type = type; + memcpy(token->str, tokstr->str, tokstr->len); + token->str[tokstr->len] = 0; + token->x = x; + token->y = y; + return token; } -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { - JSONParserContext *ctxt = parser_context_new(tokens); + JSONParserContext ctxt = { .buf = tokens, .ap = ap }; QObject *result; - if (!ctxt) { - return NULL; - } - - result = parse_value(ctxt, ap); + result = parse_value(&ctxt); + assert(ctxt.err || g_queue_is_empty(ctxt.buf)); - error_propagate(errp, ctxt->err); + error_propagate(errp, ctxt.err); - parser_context_free(ctxt); + while (!g_queue_is_empty(ctxt.buf)) { + parser_context_pop_token(&ctxt); + } + g_free(ctxt.current); return result; } diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index c51c2021f9..47dd7ea576 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -12,34 +12,29 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/error.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) #define MAX_TOKEN_COUNT (2ULL << 20) -#define MAX_NESTING (1ULL << 10) - -static void json_message_free_token(void *token, void *opaque) -{ - g_free(token); -} +#define MAX_NESTING (1 << 10) static void json_message_free_tokens(JSONMessageParser *parser) { - if (parser->tokens) { - g_queue_foreach(parser->tokens, json_message_free_token, NULL); - g_queue_free(parser->tokens); - parser->tokens = NULL; + JSONToken *token; + + while ((token = g_queue_pop_head(&parser->tokens))) { + g_free(token); } } -static void json_message_process_token(JSONLexer *lexer, GString *input, - JSONTokenType type, int x, int y) +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); + QObject *json = NULL; + Error *err = NULL; JSONToken *token; - GQueue *tokens; switch (type) { case JSON_LCURLY: @@ -54,79 +49,82 @@ static void json_message_process_token(JSONLexer *lexer, GString *input, case JSON_RSQUARE: parser->bracket_count--; break; + case JSON_ERROR: + error_setg(&err, "JSON parse error, stray '%s'", input->str); + goto out_emit; + case JSON_END_OF_INPUT: + if (g_queue_is_empty(&parser->tokens)) { + return; + } + json = json_parser_parse(&parser->tokens, parser->ap, &err); + goto out_emit; default: break; } - token = g_malloc(sizeof(JSONToken) + input->len + 1); - token->type = type; - memcpy(token->str, input->str, input->len); - token->str[input->len] = 0; - token->x = x; - token->y = y; + /* + * Security consideration, we limit total memory allocated per object + * and the maximum recursion depth that a message can force. + */ + if (parser->token_size + input->len + 1 > MAX_TOKEN_SIZE) { + error_setg(&err, "JSON token size limit exceeded"); + goto out_emit; + } + if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) { + error_setg(&err, "JSON token count limit exceeded"); + goto out_emit; + } + if (parser->bracket_count + parser->brace_count > MAX_NESTING) { + error_setg(&err, "JSON nesting depth limit exceeded"); + goto out_emit; + } + token = json_token(type, x, y, input); parser->token_size += input->len; - g_queue_push_tail(parser->tokens, token); + g_queue_push_tail(&parser->tokens, token); - if (type == JSON_ERROR) { - goto out_emit_bad; - } else if (parser->brace_count < 0 || - parser->bracket_count < 0 || - (parser->brace_count == 0 && - parser->bracket_count == 0)) { - goto out_emit; - } else if (parser->token_size > MAX_TOKEN_SIZE || - g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT || - parser->bracket_count + parser->brace_count > MAX_NESTING) { - /* Security consideration, we limit total memory allocated per object - * and the maximum recursion depth that a message can force. - */ - goto out_emit_bad; + if ((parser->brace_count > 0 || parser->bracket_count > 0) + && parser->bracket_count >= 0 && parser->bracket_count >= 0) { + return; } - return; + json = json_parser_parse(&parser->tokens, parser->ap, &err); -out_emit_bad: - /* - * Clear out token list and tell the parser to emit an error - * indication by passing it a NULL list - */ - json_message_free_tokens(parser); out_emit: - /* send current list of tokens to parser and reset tokenizer */ parser->brace_count = 0; parser->bracket_count = 0; - /* parser->emit takes ownership of parser->tokens. Remove our own - * reference to parser->tokens before handing it out to parser->emit. - */ - tokens = parser->tokens; - parser->tokens = g_queue_new(); - parser->emit(parser, tokens); + json_message_free_tokens(parser); parser->token_size = 0; + parser->emit(parser->opaque, json, err); } void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, GQueue *)) + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap) { - parser->emit = func; + parser->emit = emit; + parser->opaque = opaque; + parser->ap = ap; parser->brace_count = 0; parser->bracket_count = 0; - parser->tokens = g_queue_new(); + g_queue_init(&parser->tokens); parser->token_size = 0; - json_lexer_init(&parser->lexer, json_message_process_token); + json_lexer_init(&parser->lexer, !!ap); } -int json_message_parser_feed(JSONMessageParser *parser, +void json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size) { - return json_lexer_feed(&parser->lexer, buffer, size); + json_lexer_feed(&parser->lexer, buffer, size); } -int json_message_parser_flush(JSONMessageParser *parser) +void json_message_parser_flush(JSONMessageParser *parser) { - return json_lexer_flush(&parser->lexer); + json_lexer_flush(&parser->lexer); + assert(g_queue_is_empty(&parser->tokens)); } void json_message_parser_destroy(JSONMessageParser *parser) diff --git a/qobject/qbool.c b/qobject/qbool.c index b58249925c..06dfc43498 100644 --- a/qobject/qbool.c +++ b/qobject/qbool.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qbool.h" -#include "qemu-common.h" /** * qbool_from_bool(): Create a new QBool from a bool diff --git a/qobject/qjson.c b/qobject/qjson.c index ab4040f235..db36101f3b 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -13,9 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-streamer.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" @@ -27,16 +25,29 @@ typedef struct JSONParsingState { JSONMessageParser parser; - va_list *ap; QObject *result; Error *err; } JSONParsingState; -static void parse_json(JSONMessageParser *parser, GQueue *tokens) +static void consume_json(void *opaque, QObject *json, Error *err) { - JSONParsingState *s = container_of(parser, JSONParsingState, parser); + JSONParsingState *s = opaque; - s->result = json_parser_parse_err(tokens, s->ap, &s->err); + assert(!json != !err); + assert(!s->result || !s->err); + + if (s->result) { + qobject_unref(s->result); + s->result = NULL; + error_setg(&s->err, "Expecting at most one JSON value"); + } + if (s->err) { + qobject_unref(json); + error_free(err); + return; + } + s->result = json; + s->err = err; } /* @@ -54,13 +65,15 @@ static QObject *qobject_from_jsonv(const char *string, va_list *ap, { JSONParsingState state = {}; - state.ap = ap; - - json_message_parser_init(&state.parser, parse_json); + json_message_parser_init(&state.parser, consume_json, &state, ap); json_message_parser_feed(&state.parser, string, strlen(string)); json_message_parser_flush(&state.parser); json_message_parser_destroy(&state.parser); + if (!state.result && !state.err) { + error_setg(&state.err, "Expecting a JSON value"); + } + error_propagate(errp, state.err); return state.result; } diff --git a/qobject/qlist.c b/qobject/qlist.c index 37c1c167f1..b3274af88b 100644 --- a/qobject/qlist.c +++ b/qobject/qlist.c @@ -17,7 +17,6 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qemu/queue.h" -#include "qemu-common.h" /** * qlist_new(): Create a new QList diff --git a/qobject/qnull.c b/qobject/qnull.c index f6f55f11ea..00870a1824 100644 --- a/qobject/qnull.c +++ b/qobject/qnull.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qnull.h" QNull qnull_ = { diff --git a/qobject/qnum.c b/qobject/qnum.c index 1501c82832..7012fc57f2 100644 --- a/qobject/qnum.c +++ b/qobject/qnum.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qnum.h" -#include "qemu-common.h" /** * qnum_from_int(): Create a new QNum from an int64_t diff --git a/qobject/qobject.c b/qobject/qobject.c index cf4b7e229e..878dd76e79 100644 --- a/qobject/qobject.c +++ b/qobject/qobject.c @@ -8,7 +8,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" diff --git a/qobject/qstring.c b/qobject/qstring.c index 0f1510e792..1c6897df00 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qstring.h" -#include "qemu-common.h" /** * qstring_new(): Create a new empty QString |