From 956a104a6ccbc7c5599b84e05d9c438ca85623f8 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:29 +0200
Subject: check-qjson: Cover multiple JSON objects in same string

qobject_from_json() & friends misbehave when the JSON text has more
than one JSON value.  Add test coverage to demonstrate the bugs.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-3-armbru@redhat.com>
---
 tests/check-qjson.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index eaf5d20663..cc952c56ea 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1418,6 +1418,25 @@ static void limits_nesting(void)
     g_assert(obj == NULL);
 }
 
+static void multiple_values(void)
+{
+    Error *err = NULL;
+    QObject *obj;
+
+    /* BUG this leaks the syntax tree for "false" */
+    obj = qobject_from_json("false true", &err);
+    g_assert(qbool_get_bool(qobject_to(QBool, obj)));
+    g_assert(!err);
+    qobject_unref(obj);
+
+    /* BUG simultaneously succeeds and fails */
+    /* BUG calls json_parser_parse() with errp pointing to non-null */
+    obj = qobject_from_json("} true", &err);
+    g_assert(qbool_get_bool(qobject_to(QBool, obj)));
+    error_free_or_abort(&err);
+    qobject_unref(obj);
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -1455,6 +1474,7 @@ int main(int argc, char **argv)
     g_test_add_func("/errors/invalid_dict_comma", invalid_dict_comma);
     g_test_add_func("/errors/unterminated/literal", unterminated_literal);
     g_test_add_func("/errors/limits/nesting", limits_nesting);
+    g_test_add_func("/errors/multiple_values", multiple_values);
 
     return g_test_run();
 }
-- 
cgit v1.2.3-55-g7522


From a3694181e3049db7354613c8bb86bff5b0f70333 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:30 +0200
Subject: check-qjson: Cover blank and lexically erroneous input

qobject_from_json() can return null without setting an error on
lexical errors.  I call that a bug.  Add test coverage to demonstrate
it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-4-armbru@redhat.com>
---
 tests/check-qjson.c | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index cc952c56ea..4daadc272b 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1307,8 +1307,40 @@ static void simple_varargs(void)
 
 static void empty_input(void)
 {
-    const char *empty = "";
-    QObject *obj = qobject_from_json(empty, &error_abort);
+    QObject *obj = qobject_from_json("", &error_abort);
+    g_assert(obj == NULL);
+}
+
+static void blank_input(void)
+{
+    QObject *obj = qobject_from_json("\n ", &error_abort);
+    g_assert(obj == NULL);
+}
+
+static void junk_input(void)
+{
+    /* Note: junk within strings is covered elsewhere */
+    Error *err = NULL;
+    QObject *obj;
+
+    obj = qobject_from_json("@", &err);
+    g_assert(!err);             /* BUG */
+    g_assert(obj == NULL);
+
+    obj = qobject_from_json("[0\xFF]", &err);
+    error_free_or_abort(&err);
+    g_assert(obj == NULL);
+
+    obj = qobject_from_json("00", &err);
+    g_assert(!err);             /* BUG */
+    g_assert(obj == NULL);
+
+    obj = qobject_from_json("[1e", &err);
+    g_assert(!err);             /* BUG */
+    g_assert(obj == NULL);
+
+    obj = qobject_from_json("truer", &err);
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1462,7 +1494,9 @@ int main(int argc, char **argv)
 
     g_test_add_func("/varargs/simple_varargs", simple_varargs);
 
-    g_test_add_func("/errors/empty_input", empty_input);
+    g_test_add_func("/errors/empty", empty_input);
+    g_test_add_func("/errors/blank", blank_input);
+    g_test_add_func("/errors/junk", junk_input);
     g_test_add_func("/errors/unterminated/string", unterminated_string);
     g_test_add_func("/errors/unterminated/escape", unterminated_escape);
     g_test_add_func("/errors/unterminated/sq_string", unterminated_sq_string);
-- 
cgit v1.2.3-55-g7522


From 5365490879199fbaa7fb4b3acf32e0624108e4d1 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:31 +0200
Subject: check-qjson: Cover whitespace more thoroughly

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-5-armbru@redhat.com>
---
 tests/check-qjson.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 4daadc272b..188f683317 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1236,7 +1236,7 @@ static void simple_whitespace(void)
                     })),
         },
         {
-            .encoded = " [ 43 , { 'h' : 'b' }, [ ], 42 ]",
+            .encoded = "\t[ 43 , { 'h' : 'b' },\r\n\t[ ], 42 ]\n",
             .decoded = QLIT_QLIST(((QLitObject[]){
                         QLIT_QNUM(43),
                         QLIT_QDICT(((QLitDictEntry[]){
-- 
cgit v1.2.3-55-g7522


From 4e1df9b73480420dbd5b2d22a42038079bb6e265 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:35 +0200
Subject: check-qjson: Cover escaped characters more thoroughly, part 1

escaped_string() first tests double quoted strings, then repeats a few
tests with single quotes.  Repeat all of them: store the strings to
test without quotes, and wrap them in either kind of quote for
testing.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-9-armbru@redhat.com>
---
 tests/check-qjson.c | 96 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 39 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 188f683317..008b6e95e4 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -22,55 +22,73 @@
 #include "qapi/qmp/qstring.h"
 #include "qemu-common.h"
 
+static QString *from_json_str(const char *jstr, bool single, Error **errp)
+{
+    char quote = single ? '\'' : '"';
+    char *qjstr = g_strdup_printf("%c%s%c", quote, jstr, quote);
+    QString *ret = qobject_to(QString, qobject_from_json(qjstr, errp));
+
+    g_free(qjstr);
+    return ret;
+}
+
+static char *to_json_str(QString *str)
+{
+    QString *json = qobject_to_json(QOBJECT(str));
+    char *jstr;
+
+    if (!json) {
+        return NULL;
+    }
+    /* peel off double quotes */
+    jstr = g_strndup(qstring_get_str(json) + 1,
+                     qstring_get_length(json) - 2);
+    qobject_unref(json);
+    return jstr;
+}
+
 static void escaped_string(void)
 {
-    int i;
     struct {
-        const char *encoded;
-        const char *decoded;
+        /* Content of JSON string to parse with qobject_from_json() */
+        const char *json_in;
+        /* Expected parse output; to unparse with qobject_to_json() */
+        const char *utf8_out;
         int skip;
     } test_cases[] = {
-        { "\"\\b\"", "\b" },
-        { "\"\\f\"", "\f" },
-        { "\"\\n\"", "\n" },
-        { "\"\\r\"", "\r" },
-        { "\"\\t\"", "\t" },
-        { "\"/\"", "/" },
-        { "\"\\/\"", "/", .skip = 1 },
-        { "\"\\\\\"", "\\" },
-        { "\"\\\"\"", "\"" },
-        { "\"hello world \\\"embedded string\\\"\"",
+        { "\\b", "\b" },
+        { "\\f", "\f" },
+        { "\\n", "\n" },
+        { "\\r", "\r" },
+        { "\\t", "\t" },
+        { "/", "/" },
+        { "\\/", "/", .skip = 1 },
+        { "\\\\", "\\" },
+        { "\\\"", "\"" },
+        { "hello world \\\"embedded string\\\"",
           "hello world \"embedded string\"" },
-        { "\"hello world\\nwith new line\"", "hello world\nwith new line" },
-        { "\"single byte utf-8 \\u0020\"", "single byte utf-8  ", .skip = 1 },
-        { "\"double byte utf-8 \\u00A2\"", "double byte utf-8 \xc2\xa2" },
-        { "\"triple byte utf-8 \\u20AC\"", "triple byte utf-8 \xe2\x82\xac" },
-        { "'\\b'", "\b", .skip = 1 },
-        { "'\\f'", "\f", .skip = 1 },
-        { "'\\n'", "\n", .skip = 1 },
-        { "'\\r'", "\r", .skip = 1 },
-        { "'\\t'", "\t", .skip = 1 },
-        { "'\\/'", "/", .skip = 1 },
-        { "'\\\\'", "\\", .skip = 1 },
+        { "hello world\\nwith new line", "hello world\nwith new line" },
+        { "single byte utf-8 \\u0020", "single byte utf-8  ", .skip = 1 },
+        { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
+        { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
         {}
     };
+    int i, j;
+    QString *cstr;
+    char *jstr;
 
-    for (i = 0; test_cases[i].encoded; i++) {
-        QObject *obj;
-        QString *str;
-
-        obj = qobject_from_json(test_cases[i].encoded, &error_abort);
-        str = qobject_to(QString, obj);
-        g_assert(str);
-        g_assert_cmpstr(qstring_get_str(str), ==, test_cases[i].decoded);
-
-        if (test_cases[i].skip == 0) {
-            str = qobject_to_json(obj);
-            g_assert_cmpstr(qstring_get_str(str), ==, test_cases[i].encoded);
-            qobject_unref(obj);
+    for (i = 0; test_cases[i].json_in; i++) {
+        for (j = 0; j < 2; j++) {
+            cstr = from_json_str(test_cases[i].json_in, j, &error_abort);
+            g_assert_cmpstr(qstring_get_try_str(cstr),
+                            ==, test_cases[i].utf8_out);
+            if (test_cases[i].skip == 0) {
+                jstr = to_json_str(cstr);
+                g_assert_cmpstr(jstr, ==, test_cases[i].json_in);
+                g_free(jstr);
+            }
+            qobject_unref(cstr);
         }
-
-        qobject_unref(str);
     }
 }
 
-- 
cgit v1.2.3-55-g7522


From f3cfdd3a30a4bd0158d255daeabde027b76da83f Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:36 +0200
Subject: check-qjson: Streamline escaped_string()'s test strings

Merge a few closely related test strings, and drop a few redundant
ones.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-10-armbru@redhat.com>
---
 tests/check-qjson.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 008b6e95e4..880453a93b 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -56,18 +56,8 @@ static void escaped_string(void)
         const char *utf8_out;
         int skip;
     } test_cases[] = {
-        { "\\b", "\b" },
-        { "\\f", "\f" },
-        { "\\n", "\n" },
-        { "\\r", "\r" },
-        { "\\t", "\t" },
-        { "/", "/" },
+        { "\\b\\f\\n\\r\\t\\\\\\\"", "\b\f\n\r\t\\\"" },
         { "\\/", "/", .skip = 1 },
-        { "\\\\", "\\" },
-        { "\\\"", "\"" },
-        { "hello world \\\"embedded string\\\"",
-          "hello world \"embedded string\"" },
-        { "hello world\\nwith new line", "hello world\nwith new line" },
         { "single byte utf-8 \\u0020", "single byte utf-8  ", .skip = 1 },
         { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
-- 
cgit v1.2.3-55-g7522


From e0fe2a978e9a8c0a712afa5cfd5bc38e389ae30f Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:37 +0200
Subject: check-qjson: Cover escaped characters more thoroughly, part 2

Cover escaped single quote, surrogates, invalid escapes, and
noncharacters.  This demonstrates that valid surrogate pairs are
misinterpreted, and invalid surrogates and noncharacters aren't
rejected.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-11-armbru@redhat.com>
---
 tests/check-qjson.c | 62 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 9 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 880453a93b..4bb4925673 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -57,10 +57,49 @@ static void escaped_string(void)
         int skip;
     } test_cases[] = {
         { "\\b\\f\\n\\r\\t\\\\\\\"", "\b\f\n\r\t\\\"" },
-        { "\\/", "/", .skip = 1 },
+        { "\\/\\'", "/'", .skip = 1 },
         { "single byte utf-8 \\u0020", "single byte utf-8  ", .skip = 1 },
         { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
+        { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
+          /* bug: want \xF0\x9D\x84\x9E */
+          "quadruple byte utf-8 \xED\xA0\xB4\xED\xB4\x9E", .skip = 1 },
+        { "\\", NULL },
+        { "\\z", NULL },
+        { "\\ux", NULL },
+        { "\\u1x", NULL },
+        { "\\u12x", NULL },
+        { "\\u123x", NULL },
+        { "\\u12345", "\341\210\2645" },
+        { "\\u0000x", "x", .skip = 1}, /* bug: want \xC0\x80x */
+        { "unpaired leading surrogate \\uD800",
+          /* bug: not rejected */
+          "unpaired leading surrogate \355\240\200", .skip = 1 },
+        { "unpaired leading surrogate \\uD800\\uCAFE",
+          /* bug: not rejected */
+          "unpaired leading surrogate \355\240\200\354\253\276", .skip = 1 },
+        { "unpaired leading surrogate \\uD800\\uD801\\uDC02",
+          /* bug: not rejected */
+          "unpaired leading surrogate \355\240\200\355\240\201\355\260\202",
+          .skip = 1 },
+        { "unpaired trailing surrogate \\uDC00",
+          /* bug: not rejected */
+          "unpaired trailing surrogate \355\260\200", .skip = 1},
+        { "backward surrogate pair \\uDC00\\uD800",
+          /* bug: not rejected */
+          "backward surrogate pair \355\260\200\355\240\200", .skip = 1},
+        { "noncharacter U+FDD0 \\uFDD0",
+          /* bug: not rejected */
+          "noncharacter U+FDD0 \xEF\xB7\x90", .skip = 1},
+        { "noncharacter U+FDEF \\uFDEF",
+          /* bug: not rejected */
+          "noncharacter U+FDEF \xEF\xB7\xAF", .skip = 1},
+        { "noncharacter U+1FFFE \\uD87F\\uDFFE",
+          /* bug: not rejected */
+          "noncharacter U+1FFFE \xED\xA1\xBF\xED\xBF\xBE", .skip = 1},
+        { "noncharacter U+10FFFF \\uDC3F\\uDFFF",
+          /* bug: not rejected */
+          "noncharacter U+10FFFF \xED\xB0\xBF\xED\xBF\xBF", .skip = 1},
         {}
     };
     int i, j;
@@ -69,15 +108,20 @@ static void escaped_string(void)
 
     for (i = 0; test_cases[i].json_in; i++) {
         for (j = 0; j < 2; j++) {
-            cstr = from_json_str(test_cases[i].json_in, j, &error_abort);
-            g_assert_cmpstr(qstring_get_try_str(cstr),
-                            ==, test_cases[i].utf8_out);
-            if (test_cases[i].skip == 0) {
-                jstr = to_json_str(cstr);
-                g_assert_cmpstr(jstr, ==, test_cases[i].json_in);
-                g_free(jstr);
+            if (test_cases[i].utf8_out) {
+                cstr = from_json_str(test_cases[i].json_in, j, &error_abort);
+                g_assert_cmpstr(qstring_get_try_str(cstr),
+                                ==, test_cases[i].utf8_out);
+                if (!test_cases[i].skip) {
+                    jstr = to_json_str(cstr);
+                    g_assert_cmpstr(jstr, ==, test_cases[i].json_in);
+                    g_free(jstr);
+                }
+                qobject_unref(cstr);
+            } else {
+                cstr = from_json_str(test_cases[i].json_in, j, NULL);
+                g_assert(!cstr);
             }
-            qobject_unref(cstr);
         }
     }
 }
-- 
cgit v1.2.3-55-g7522


From 069946f402de10f544e198b0b5c016e2a70e8dd4 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:38 +0200
Subject: check-qjson: Consolidate partly redundant string tests

simple_string() and single_quote_string() have become redundant with
escaped_string(), except for embedded single and double quotes.
Replace them by a test that covers just that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-12-armbru@redhat.com>
---
 tests/check-qjson.c | 64 ++++++++++++-----------------------------------------
 1 file changed, 14 insertions(+), 50 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 4bb4925673..c67ac72858 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -126,59 +126,24 @@ static void escaped_string(void)
     }
 }
 
-static void simple_string(void)
+static void string_with_quotes(void)
 {
-    int i;
-    struct {
-        const char *encoded;
-        const char *decoded;
-    } test_cases[] = {
-        { "\"hello world\"", "hello world" },
-        { "\"the quick brown fox jumped over the fence\"",
-          "the quick brown fox jumped over the fence" },
-        {}
+    const char *test_cases[] = {
+        "\"the bee's knees\"",
+        "'double quote \"'",
+        NULL
     };
-
-    for (i = 0; test_cases[i].encoded; i++) {
-        QObject *obj;
-        QString *str;
-
-        obj = qobject_from_json(test_cases[i].encoded, &error_abort);
-        str = qobject_to(QString, obj);
-        g_assert(str);
-        g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
-
-        str = qobject_to_json(obj);
-        g_assert(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0);
-
-        qobject_unref(obj);
-        
-        qobject_unref(str);
-    }
-}
-
-static void single_quote_string(void)
-{
     int i;
-    struct {
-        const char *encoded;
-        const char *decoded;
-    } test_cases[] = {
-        { "'hello world'", "hello world" },
-        { "'the quick brown fox \\' jumped over the fence'",
-          "the quick brown fox ' jumped over the fence" },
-        {}
-    };
-
-    for (i = 0; test_cases[i].encoded; i++) {
-        QObject *obj;
-        QString *str;
+    QString *str;
+    char *cstr;
 
-        obj = qobject_from_json(test_cases[i].encoded, &error_abort);
-        str = qobject_to(QString, obj);
+    for (i = 0; test_cases[i]; i++) {
+        str = qobject_to(QString,
+                         qobject_from_json(test_cases[i], &error_abort));
         g_assert(str);
-        g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
-
+        cstr = g_strndup(test_cases[i] + 1, strlen(test_cases[i]) - 2);
+        g_assert_cmpstr(qstring_get_str(str), ==, cstr);
+        g_free(cstr);
         qobject_unref(str);
     }
 }
@@ -1525,10 +1490,9 @@ int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
 
-    g_test_add_func("/literals/string/simple", simple_string);
     g_test_add_func("/literals/string/escaped", escaped_string);
+    g_test_add_func("/literals/string/quotes", string_with_quotes);
     g_test_add_func("/literals/string/utf8", utf8_string);
-    g_test_add_func("/literals/string/single_quote", single_quote_string);
     g_test_add_func("/literals/string/vararg", vararg_string);
 
     g_test_add_func("/literals/number/simple", simple_number);
-- 
cgit v1.2.3-55-g7522


From 6ad8444f6aadb8b43c78583cab09e8839c79305f Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:39 +0200
Subject: check-qjson: Cover UTF-8 in single quoted strings

utf8_string() tests only double quoted strings.  Cover single quoted
strings, too: store the strings to test without quotes, then wrap them
in either kind of quote.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-13-armbru@redhat.com>
---
 tests/check-qjson.c | 429 ++++++++++++++++++++++++++--------------------------
 1 file changed, 215 insertions(+), 214 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index c67ac72858..b229bfabac 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -175,10 +175,14 @@ static void utf8_string(void)
      * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
      */
     static const struct {
+        /* Content of JSON string to parse with qobject_from_json() */
         const char *json_in;
+        /* Expected parse output */
         const char *utf8_out;
-        const char *json_out;   /* defaults to @json_in */
-        const char *utf8_in;    /* defaults to @utf8_out */
+        /* Expected unparse output, defaults to @json_in */
+        const char *json_out;
+        /* Expected parse output for @json_out, defaults to @utf8_out */
+        const char *utf8_in;
     } test_cases[] = {
         /*
          * Bug markers used here:
@@ -196,72 +200,72 @@ static void utf8_string(void)
         /* 1  Some correct UTF-8 text */
         {
             /* a bit of German */
-            "\"Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt"
-            " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.\"",
             "Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt"
             " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.",
-            "\"Falsches \\u00DCben von Xylophonmusik qu\\u00E4lt"
-            " jeden gr\\u00F6\\u00DFeren Zwerg.\"",
+            "Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt"
+            " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.",
+            "Falsches \\u00DCben von Xylophonmusik qu\\u00E4lt"
+            " jeden gr\\u00F6\\u00DFeren Zwerg.",
         },
         {
             /* a bit of Greek */
-            "\"\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5\"",
             "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5",
-            "\"\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5\"",
+            "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5",
+            "\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5",
         },
         /* 2  Boundary condition test cases */
         /* 2.1  First possible sequence of a certain length */
         /* 2.1.1  1 byte U+0000 */
         {
-            "\"\\u0000\"",
+            "\\u0000",
             "",                 /* bug: want overlong "\xC0\x80" */
-            "\"\\u0000\"",
+            "\\u0000",
             "\xC0\x80",
         },
         /* 2.1.2  2 bytes U+0080 */
         {
-            "\"\xC2\x80\"",
             "\xC2\x80",
-            "\"\\u0080\"",
+            "\xC2\x80",
+            "\\u0080",
         },
         /* 2.1.3  3 bytes U+0800 */
         {
-            "\"\xE0\xA0\x80\"",
             "\xE0\xA0\x80",
-            "\"\\u0800\"",
+            "\xE0\xA0\x80",
+            "\\u0800",
         },
         /* 2.1.4  4 bytes U+10000 */
         {
-            "\"\xF0\x90\x80\x80\"",
             "\xF0\x90\x80\x80",
-            "\"\\uD800\\uDC00\"",
+            "\xF0\x90\x80\x80",
+            "\\uD800\\uDC00",
         },
         /* 2.1.5  5 bytes U+200000 */
         {
-            "\"\xF8\x88\x80\x80\x80\"",
+            "\xF8\x88\x80\x80\x80",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF8\x88\x80\x80\x80",
         },
         /* 2.1.6  6 bytes U+4000000 */
         {
-            "\"\xFC\x84\x80\x80\x80\x80\"",
+            "\xFC\x84\x80\x80\x80\x80",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFC\x84\x80\x80\x80\x80",
         },
         /* 2.2  Last possible sequence of a certain length */
         /* 2.2.1  1 byte U+007F */
         {
-            "\"\x7F\"",
             "\x7F",
-            "\"\\u007F\"",
+            "\x7F",
+            "\\u007F",
         },
         /* 2.2.2  2 bytes U+07FF */
         {
-            "\"\xDF\xBF\"",
             "\xDF\xBF",
-            "\"\\u07FF\"",
+            "\xDF\xBF",
+            "\\u07FF",
         },
         /*
          * 2.2.3  3 bytes U+FFFC
@@ -273,122 +277,122 @@ static void utf8_string(void)
          * U+FFFC here.
          */
         {
-            "\"\xEF\xBF\xBC\"",
             "\xEF\xBF\xBC",
-            "\"\\uFFFC\"",
+            "\xEF\xBF\xBC",
+            "\\uFFFC",
         },
         /* 2.2.4  4 bytes U+1FFFFF */
         {
-            "\"\xF7\xBF\xBF\xBF\"",
+            "\xF7\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF7\xBF\xBF\xBF",
         },
         /* 2.2.5  5 bytes U+3FFFFFF */
         {
-            "\"\xFB\xBF\xBF\xBF\xBF\"",
+            "\xFB\xBF\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFB\xBF\xBF\xBF\xBF",
         },
         /* 2.2.6  6 bytes U+7FFFFFFF */
         {
-            "\"\xFD\xBF\xBF\xBF\xBF\xBF\"",
+            "\xFD\xBF\xBF\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFD\xBF\xBF\xBF\xBF\xBF",
         },
         /* 2.3  Other boundary conditions */
         {
             /* last one before surrogate range: U+D7FF */
-            "\"\xED\x9F\xBF\"",
             "\xED\x9F\xBF",
-            "\"\\uD7FF\"",
+            "\xED\x9F\xBF",
+            "\\uD7FF",
         },
         {
             /* first one after surrogate range: U+E000 */
-            "\"\xEE\x80\x80\"",
             "\xEE\x80\x80",
-            "\"\\uE000\"",
+            "\xEE\x80\x80",
+            "\\uE000",
         },
         {
             /* last one in BMP: U+FFFD */
-            "\"\xEF\xBF\xBD\"",
             "\xEF\xBF\xBD",
-            "\"\\uFFFD\"",
+            "\xEF\xBF\xBD",
+            "\\uFFFD",
         },
         {
             /* last one in last plane: U+10FFFD */
-            "\"\xF4\x8F\xBF\xBD\"",
             "\xF4\x8F\xBF\xBD",
-            "\"\\uDBFF\\uDFFD\""
+            "\xF4\x8F\xBF\xBD",
+            "\\uDBFF\\uDFFD"
         },
         {
             /* first one beyond Unicode range: U+110000 */
-            "\"\xF4\x90\x80\x80\"",
             "\xF4\x90\x80\x80",
-            "\"\\uFFFD\"",
+            "\xF4\x90\x80\x80",
+            "\\uFFFD",
         },
         /* 3  Malformed sequences */
         /* 3.1  Unexpected continuation bytes */
         /* 3.1.1  First continuation byte */
         {
-            "\"\x80\"",
+            "\x80",
             "\x80",             /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.1.2  Last continuation byte */
         {
-            "\"\xBF\"",
+            "\xBF",
             "\xBF",             /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.1.3  2 continuation bytes */
         {
-            "\"\x80\xBF\"",
+            "\x80\xBF",
             "\x80\xBF",         /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         /* 3.1.4  3 continuation bytes */
         {
-            "\"\x80\xBF\x80\"",
+            "\x80\xBF\x80",
             "\x80\xBF\x80",     /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.5  4 continuation bytes */
         {
-            "\"\x80\xBF\x80\xBF\"",
+            "\x80\xBF\x80\xBF",
             "\x80\xBF\x80\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.6  5 continuation bytes */
         {
-            "\"\x80\xBF\x80\xBF\x80\"",
+            "\x80\xBF\x80\xBF\x80",
             "\x80\xBF\x80\xBF\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.7  6 continuation bytes */
         {
-            "\"\x80\xBF\x80\xBF\x80\xBF\"",
+            "\x80\xBF\x80\xBF\x80\xBF",
             "\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.8  7 continuation bytes */
         {
-            "\"\x80\xBF\x80\xBF\x80\xBF\x80\"",
+            "\x80\xBF\x80\xBF\x80\xBF\x80",
             "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.9  Sequence of all 64 possible continuation bytes */
         {
-            "\"\x80\x81\x82\x83\x84\x85\x86\x87"
+            "\x80\x81\x82\x83\x84\x85\x86\x87"
             "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
             "\x90\x91\x92\x93\x94\x95\x96\x97"
             "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
             "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
             "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
             "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
-            "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\"",
+            "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
              /* bug: not corrected */
             "\x80\x81\x82\x83\x84\x85\x86\x87"
             "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
@@ -398,27 +402,27 @@ static void utf8_string(void)
             "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
             "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
             "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
-            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\""
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
         },
         /* 3.2  Lonely start characters */
         /* 3.2.1  All 32 first bytes of 2-byte sequences, followed by space */
         {
-            "\"\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
+            "\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
             "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
             "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
-            "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF \"",
+            "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
-            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
             "\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
             "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
             "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
@@ -426,159 +430,159 @@ static void utf8_string(void)
         },
         /* 3.2.2  All 16 first bytes of 3-byte sequences, followed by space */
         {
-            "\"\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
-            "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF \"",
+            "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
+            "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
             /* bug: not corrected */
             "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
             "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
-            "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
-            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.3  All 8 first bytes of 4-byte sequences, followed by space */
         {
-            "\"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 \"",
+            "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
             "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
         },
         /* 3.2.4  All 4 first bytes of 5-byte sequences, followed by space */
         {
-            "\"\xF8 \xF9 \xFA \xFB \"",
+            "\xF8 \xF9 \xFA \xFB ",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
+            "\\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
             "\xF8 \xF9 \xFA \xFB ",
         },
         /* 3.2.5  All 2 first bytes of 6-byte sequences, followed by space */
         {
-            "\"\xFC \xFD \"",
+            "\xFC \xFD ",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD \\uFFFD \"",
+            "\\uFFFD \\uFFFD ",
             "\xFC \xFD ",
         },
         /* 3.3  Sequences with last continuation byte missing */
         /* 3.3.1  2-byte sequence with last byte missing (U+0000) */
         {
-            "\"\xC0\"",
+            "\xC0",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xC0",
         },
         /* 3.3.2  3-byte sequence with last byte missing (U+0000) */
         {
-            "\"\xE0\x80\"",
+            "\xE0\x80",
             "\xE0\x80",           /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.3.3  4-byte sequence with last byte missing (U+0000) */
         {
-            "\"\xF0\x80\x80\"",
+            "\xF0\x80\x80",
             "\xF0\x80\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.3.4  5-byte sequence with last byte missing (U+0000) */
         {
-            "\"\xF8\x80\x80\x80\"",
+            "\xF8\x80\x80\x80",
             NULL,                   /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF8\x80\x80\x80",
         },
         /* 3.3.5  6-byte sequence with last byte missing (U+0000) */
         {
-            "\"\xFC\x80\x80\x80\x80\"",
+            "\xFC\x80\x80\x80\x80",
             NULL,                        /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFC\x80\x80\x80\x80",
         },
         /* 3.3.6  2-byte sequence with last byte missing (U+07FF) */
         {
-            "\"\xDF\"",
+            "\xDF",
             "\xDF",             /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.3.7  3-byte sequence with last byte missing (U+FFFF) */
         {
-            "\"\xEF\xBF\"",
+            "\xEF\xBF",
             "\xEF\xBF",           /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 3.3.8  4-byte sequence with last byte missing (U+1FFFFF) */
         {
-            "\"\xF7\xBF\xBF\"",
+            "\xF7\xBF\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF7\xBF\xBF",
         },
         /* 3.3.9  5-byte sequence with last byte missing (U+3FFFFFF) */
         {
-            "\"\xFB\xBF\xBF\xBF\"",
+            "\xFB\xBF\xBF\xBF",
             NULL,                 /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFB\xBF\xBF\xBF",
         },
         /* 3.3.10  6-byte sequence with last byte missing (U+7FFFFFFF) */
         {
-            "\"\xFD\xBF\xBF\xBF\xBF\"",
+            "\xFD\xBF\xBF\xBF\xBF",
             NULL,                        /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFD\xBF\xBF\xBF\xBF",
         },
         /* 3.4  Concatenation of incomplete sequences */
         {
-            "\"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
-            "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF\"",
+            "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
+            "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
-            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
             "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
             "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
         },
         /* 3.5  Impossible bytes */
         {
-            "\"\xFE\"",
+            "\xFE",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFE",
         },
         {
-            "\"\xFF\"",
+            "\xFF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFF",
         },
         {
-            "\"\xFE\xFE\xFF\xFF\"",
+            "\xFE\xFE\xFF\xFF",
             NULL,                 /* bug: rejected */
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
             "\xFE\xFE\xFF\xFF",
         },
         /* 4  Overlong sequences */
         /* 4.1  Overlong '/' */
         {
-            "\"\xC0\xAF\"",
+            "\xC0\xAF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xC0\xAF",
         },
         {
-            "\"\xE0\x80\xAF\"",
+            "\xE0\x80\xAF",
             "\xE0\x80\xAF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
-            "\"\xF0\x80\x80\xAF\"",
+            "\xF0\x80\x80\xAF",
             "\xF0\x80\x80\xAF",  /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
-            "\"\xF8\x80\x80\x80\xAF\"",
+            "\xF8\x80\x80\x80\xAF",
             NULL,                        /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF8\x80\x80\x80\xAF",
         },
         {
-            "\"\xFC\x80\x80\x80\x80\xAF\"",
+            "\xFC\x80\x80\x80\x80\xAF",
             NULL,                               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFC\x80\x80\x80\x80\xAF",
         },
         /*
@@ -589,16 +593,16 @@ static void utf8_string(void)
          */
         {
             /* \U+007F */
-            "\"\xC1\xBF\"",
+            "\xC1\xBF",
             NULL,               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xC1\xBF",
         },
         {
             /* \U+07FF */
-            "\"\xE0\x9F\xBF\"",
+            "\xE0\x9F\xBF",
             "\xE0\x9F\xBF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /*
@@ -607,181 +611,181 @@ static void utf8_string(void)
              * noncharacter.  Testing U+FFFC seems more useful.  See
              * also 2.2.3
              */
-            "\"\xF0\x8F\xBF\xBC\"",
+            "\xF0\x8F\xBF\xBC",
             "\xF0\x8F\xBF\xBC",   /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+1FFFFF */
-            "\"\xF8\x87\xBF\xBF\xBF\"",
+            "\xF8\x87\xBF\xBF\xBF",
             NULL,                        /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF8\x87\xBF\xBF\xBF",
         },
         {
             /* \U+3FFFFFF */
-            "\"\xFC\x83\xBF\xBF\xBF\xBF\"",
+            "\xFC\x83\xBF\xBF\xBF\xBF",
             NULL,                               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFC\x83\xBF\xBF\xBF\xBF",
         },
         /* 4.3  Overlong representation of the NUL character */
         {
             /* \U+0000 */
-            "\"\xC0\x80\"",
+            "\xC0\x80",
             NULL,               /* bug: rejected */
-            "\"\\u0000\"",
+            "\\u0000",
             "\xC0\x80",
         },
         {
             /* \U+0000 */
-            "\"\xE0\x80\x80\"",
+            "\xE0\x80\x80",
             "\xE0\x80\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+0000 */
-            "\"\xF0\x80\x80\x80\"",
+            "\xF0\x80\x80\x80",
             "\xF0\x80\x80\x80",   /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+0000 */
-            "\"\xF8\x80\x80\x80\x80\"",
+            "\xF8\x80\x80\x80\x80",
             NULL,                        /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xF8\x80\x80\x80\x80",
         },
         {
             /* \U+0000 */
-            "\"\xFC\x80\x80\x80\x80\x80\"",
+            "\xFC\x80\x80\x80\x80\x80",
             NULL,                               /* bug: rejected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
             "\xFC\x80\x80\x80\x80\x80",
         },
         /* 5  Illegal code positions */
         /* 5.1  Single UTF-16 surrogates */
         {
             /* \U+D800 */
-            "\"\xED\xA0\x80\"",
+            "\xED\xA0\x80",
             "\xED\xA0\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DB7F */
-            "\"\xED\xAD\xBF\"",
+            "\xED\xAD\xBF",
             "\xED\xAD\xBF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DB80 */
-            "\"\xED\xAE\x80\"",
+            "\xED\xAE\x80",
             "\xED\xAE\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DBFF */
-            "\"\xED\xAF\xBF\"",
+            "\xED\xAF\xBF",
             "\xED\xAF\xBF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DC00 */
-            "\"\xED\xB0\x80\"",
+            "\xED\xB0\x80",
             "\xED\xB0\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DF80 */
-            "\"\xED\xBE\x80\"",
+            "\xED\xBE\x80",
             "\xED\xBE\x80",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+DFFF */
-            "\"\xED\xBF\xBF\"",
+            "\xED\xBF\xBF",
             "\xED\xBF\xBF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* 5.2  Paired UTF-16 surrogates */
         {
             /* \U+D800\U+DC00 */
-            "\"\xED\xA0\x80\xED\xB0\x80\"",
+            "\xED\xA0\x80\xED\xB0\x80",
             "\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+D800\U+DFFF */
-            "\"\xED\xA0\x80\xED\xBF\xBF\"",
+            "\xED\xA0\x80\xED\xBF\xBF",
             "\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DC00 */
-            "\"\xED\xAD\xBF\xED\xB0\x80\"",
+            "\xED\xAD\xBF\xED\xB0\x80",
             "\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DFFF */
-            "\"\xED\xAD\xBF\xED\xBF\xBF\"",
+            "\xED\xAD\xBF\xED\xBF\xBF",
             "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DC00 */
-            "\"\xED\xAE\x80\xED\xB0\x80\"",
+            "\xED\xAE\x80\xED\xB0\x80",
             "\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DFFF */
-            "\"\xED\xAE\x80\xED\xBF\xBF\"",
+            "\xED\xAE\x80\xED\xBF\xBF",
             "\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DC00 */
-            "\"\xED\xAF\xBF\xED\xB0\x80\"",
+            "\xED\xAF\xBF\xED\xB0\x80",
             "\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DFFF */
-            "\"\xED\xAF\xBF\xED\xBF\xBF\"",
+            "\xED\xAF\xBF\xED\xBF\xBF",
             "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */
-            "\"\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD",
         },
         /* 5.3  Other illegal code positions */
         /* BMP noncharacters */
         {
             /* \U+FFFE */
-            "\"\xEF\xBF\xBE\"",
+            "\xEF\xBF\xBE",
             "\xEF\xBF\xBE",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* \U+FFFF */
-            "\"\xEF\xBF\xBF\"",
+            "\xEF\xBF\xBF",
             "\xEF\xBF\xBF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* U+FDD0 */
-            "\"\xEF\xB7\x90\"",
+            "\xEF\xB7\x90",
             "\xEF\xB7\x90",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         {
             /* U+FDEF */
-            "\"\xEF\xB7\xAF\"",
+            "\xEF\xB7\xAF",
             "\xEF\xB7\xAF",     /* bug: not corrected */
-            "\"\\uFFFD\"",
+            "\\uFFFD",
         },
         /* Plane 1 .. 16 noncharacters */
         {
             /* U+1FFFE U+1FFFF U+2FFFE U+2FFFF ... U+10FFFE U+10FFFF */
-            "\"\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
+            "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
             "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
             "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"
             "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF"
@@ -796,7 +800,7 @@ static void utf8_string(void)
             "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF"
             "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
             "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
-            "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF\"",
+            "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
             /* bug: not corrected */
             "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
             "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
@@ -814,55 +818,52 @@ static void utf8_string(void)
             "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
             "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
             "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
-            "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
-            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         {}
     };
-    int i;
-    QObject *obj;
+    int i, j;
     QString *str;
     const char *json_in, *utf8_out, *utf8_in, *json_out;
+    char *jstr;
 
     for (i = 0; test_cases[i].json_in; i++) {
-        json_in = test_cases[i].json_in;
-        utf8_out = test_cases[i].utf8_out;
-        utf8_in = test_cases[i].utf8_in ?: test_cases[i].utf8_out;
-        json_out = test_cases[i].json_out ?: test_cases[i].json_in;
-
-        obj = qobject_from_json(json_in, utf8_out ? &error_abort : NULL);
-        if (utf8_out) {
-            str = qobject_to(QString, obj);
-            g_assert(str);
-            g_assert_cmpstr(qstring_get_str(str), ==, utf8_out);
-        } else {
-            g_assert(!obj);
-        }
-        qobject_unref(obj);
+        for (j = 0; j < 2; j++) {
+            json_in = test_cases[i].json_in;
+            utf8_out = test_cases[i].utf8_out;
+            utf8_in = test_cases[i].utf8_in ?: test_cases[i].utf8_out;
+            json_out = test_cases[i].json_out ?: test_cases[i].json_in;
+
+            /* Parse @json_in, expect @utf8_out */
+            if (utf8_out) {
+                str = from_json_str(json_in, j, &error_abort);
+                g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_out);
+                qobject_unref(str);
+            } else {
+                str = from_json_str(json_in, j, NULL);
+                g_assert(!str);
+            }
 
-        obj = QOBJECT(qstring_from_str(utf8_in));
-        str = qobject_to_json(obj);
-        if (json_out) {
-            g_assert(str);
-            g_assert_cmpstr(qstring_get_str(str), ==, json_out);
-        } else {
-            g_assert(!str);
-        }
-        qobject_unref(str);
-        qobject_unref(obj);
+            /* Unparse @utf8_in, expect @json_out */
+            str = qstring_from_str(utf8_in);
+            jstr = to_json_str(str);
+            g_assert_cmpstr(jstr, ==, json_out);
+            qobject_unref(str);
+            g_free(jstr);
 
-        /*
-         * Disabled, because qobject_from_json() is buggy, and I can't
-         * be bothered to add the expected incorrect results.
-         * FIXME Enable once these bugs have been fixed.
-         */
-        if (0 && json_out != json_in) {
-            obj = qobject_from_json(json_out, &error_abort);
-            str = qobject_to(QString, obj);
-            g_assert(str);
-            g_assert_cmpstr(qstring_get_str(str), ==, utf8_out);
+            /*
+             * Parse @json_out right back
+             * Disabled, because qobject_from_json() is buggy, and I can't
+             * be bothered to add the expected incorrect results.
+             * FIXME Enable once these bugs have been fixed.
+             */
+            if (0 && json_out != json_in) {
+                str = from_json_str(json_out, j, &error_abort);
+                g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_out);
+            }
         }
     }
 }
-- 
cgit v1.2.3-55-g7522


From 32846e93047899bdde498d8c6a14d291fe6fa4e2 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:40 +0200
Subject: check-qjson: Simplify utf8_string()

The previous commit made utf8_string()'s test_cases[].utf8_in
superfluous: we can use .json_in instead.  Except for the case testing
U+0000.  \x00 doesn't work in C strings, so it tests \\u0000 instead.
But testing \\uXXXX is escaped_string()'s job.  It's covered there.
Test U+0001 here, and drop .utf8_in.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-14-armbru@redhat.com>
---
 tests/check-qjson.c | 53 +++++++++--------------------------------------------
 1 file changed, 9 insertions(+), 44 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index b229bfabac..8d0fe4c8a7 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -181,8 +181,6 @@ static void utf8_string(void)
         const char *utf8_out;
         /* Expected unparse output, defaults to @json_in */
         const char *json_out;
-        /* Expected parse output for @json_out, defaults to @utf8_out */
-        const char *utf8_in;
     } test_cases[] = {
         /*
          * Bug markers used here:
@@ -191,10 +189,6 @@ static void utf8_string(void)
          * - bug: rejected
          *   JSON parser rejects invalid sequence(s)
          *   We may choose to define this as feature
-         * - bug: want "..."
-         *   JSON parser produces incorrect result, this is the
-         *   correct one, assuming replacement character U+FFFF
-         *   We may choose to reject instead of replace
          */
 
         /* 1  Some correct UTF-8 text */
@@ -215,12 +209,15 @@ static void utf8_string(void)
         },
         /* 2  Boundary condition test cases */
         /* 2.1  First possible sequence of a certain length */
-        /* 2.1.1  1 byte U+0000 */
+        /*
+         * 2.1.1  1 byte U+0001
+         * \x00 is impossible, test \x01 instead.  Other
+         * representations of U+0000 are covered under 4.3.
+         */
         {
-            "\\u0000",
-            "",                 /* bug: want overlong "\xC0\x80" */
-            "\\u0000",
-            "\xC0\x80",
+            "\x01",
+            "\x01",
+            "\\u0001",
         },
         /* 2.1.2  2 bytes U+0080 */
         {
@@ -245,14 +242,12 @@ static void utf8_string(void)
             "\xF8\x88\x80\x80\x80",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xF8\x88\x80\x80\x80",
         },
         /* 2.1.6  6 bytes U+4000000 */
         {
             "\xFC\x84\x80\x80\x80\x80",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xFC\x84\x80\x80\x80\x80",
         },
         /* 2.2  Last possible sequence of a certain length */
         /* 2.2.1  1 byte U+007F */
@@ -286,21 +281,18 @@ static void utf8_string(void)
             "\xF7\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xF7\xBF\xBF\xBF",
         },
         /* 2.2.5  5 bytes U+3FFFFFF */
         {
             "\xFB\xBF\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xFB\xBF\xBF\xBF\xBF",
         },
         /* 2.2.6  6 bytes U+7FFFFFFF */
         {
             "\xFD\xBF\xBF\xBF\xBF\xBF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xFD\xBF\xBF\xBF\xBF\xBF",
         },
         /* 2.3  Other boundary conditions */
         {
@@ -423,10 +415,6 @@ static void utf8_string(void)
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
-            "\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
-            "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
-            "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
-            "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ",
         },
         /* 3.2.2  All 16 first bytes of 3-byte sequences, followed by space */
         {
@@ -443,21 +431,18 @@ static void utf8_string(void)
             "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
             NULL,               /* bug: rejected */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
-            "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
         },
         /* 3.2.4  All 4 first bytes of 5-byte sequences, followed by space */
         {
             "\xF8 \xF9 \xFA \xFB ",
             NULL,               /* bug: rejected */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
-            "\xF8 \xF9 \xFA \xFB ",
         },
         /* 3.2.5  All 2 first bytes of 6-byte sequences, followed by space */
         {
             "\xFC \xFD ",
             NULL,               /* bug: rejected */
             "\\uFFFD \\uFFFD ",
-            "\xFC \xFD ",
         },
         /* 3.3  Sequences with last continuation byte missing */
         /* 3.3.1  2-byte sequence with last byte missing (U+0000) */
@@ -465,7 +450,6 @@ static void utf8_string(void)
             "\xC0",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xC0",
         },
         /* 3.3.2  3-byte sequence with last byte missing (U+0000) */
         {
@@ -484,14 +468,12 @@ static void utf8_string(void)
             "\xF8\x80\x80\x80",
             NULL,                   /* bug: rejected */
             "\\uFFFD",
-            "\xF8\x80\x80\x80",
         },
         /* 3.3.5  6-byte sequence with last byte missing (U+0000) */
         {
             "\xFC\x80\x80\x80\x80",
             NULL,                        /* bug: rejected */
             "\\uFFFD",
-            "\xFC\x80\x80\x80\x80",
         },
         /* 3.3.6  2-byte sequence with last byte missing (U+07FF) */
         {
@@ -510,21 +492,18 @@ static void utf8_string(void)
             "\xF7\xBF\xBF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xF7\xBF\xBF",
         },
         /* 3.3.9  5-byte sequence with last byte missing (U+3FFFFFF) */
         {
             "\xFB\xBF\xBF\xBF",
             NULL,                 /* bug: rejected */
             "\\uFFFD",
-            "\xFB\xBF\xBF\xBF",
         },
         /* 3.3.10  6-byte sequence with last byte missing (U+7FFFFFFF) */
         {
             "\xFD\xBF\xBF\xBF\xBF",
             NULL,                        /* bug: rejected */
             "\\uFFFD",
-            "\xFD\xBF\xBF\xBF\xBF",
         },
         /* 3.4  Concatenation of incomplete sequences */
         {
@@ -533,27 +512,22 @@ static void utf8_string(void)
             NULL,               /* bug: rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
-            "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
-            "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
         },
         /* 3.5  Impossible bytes */
         {
             "\xFE",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xFE",
         },
         {
             "\xFF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xFF",
         },
         {
             "\xFE\xFE\xFF\xFF",
             NULL,                 /* bug: rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
-            "\xFE\xFE\xFF\xFF",
         },
         /* 4  Overlong sequences */
         /* 4.1  Overlong '/' */
@@ -561,7 +535,6 @@ static void utf8_string(void)
             "\xC0\xAF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xC0\xAF",
         },
         {
             "\xE0\x80\xAF",
@@ -577,13 +550,11 @@ static void utf8_string(void)
             "\xF8\x80\x80\x80\xAF",
             NULL,                        /* bug: rejected */
             "\\uFFFD",
-            "\xF8\x80\x80\x80\xAF",
         },
         {
             "\xFC\x80\x80\x80\x80\xAF",
             NULL,                               /* bug: rejected */
             "\\uFFFD",
-            "\xFC\x80\x80\x80\x80\xAF",
         },
         /*
          * 4.2  Maximum overlong sequences
@@ -596,7 +567,6 @@ static void utf8_string(void)
             "\xC1\xBF",
             NULL,               /* bug: rejected */
             "\\uFFFD",
-            "\xC1\xBF",
         },
         {
             /* \U+07FF */
@@ -620,14 +590,12 @@ static void utf8_string(void)
             "\xF8\x87\xBF\xBF\xBF",
             NULL,                        /* bug: rejected */
             "\\uFFFD",
-            "\xF8\x87\xBF\xBF\xBF",
         },
         {
             /* \U+3FFFFFF */
             "\xFC\x83\xBF\xBF\xBF\xBF",
             NULL,                               /* bug: rejected */
             "\\uFFFD",
-            "\xFC\x83\xBF\xBF\xBF\xBF",
         },
         /* 4.3  Overlong representation of the NUL character */
         {
@@ -635,7 +603,6 @@ static void utf8_string(void)
             "\xC0\x80",
             NULL,               /* bug: rejected */
             "\\u0000",
-            "\xC0\x80",
         },
         {
             /* \U+0000 */
@@ -654,14 +621,12 @@ static void utf8_string(void)
             "\xF8\x80\x80\x80\x80",
             NULL,                        /* bug: rejected */
             "\\uFFFD",
-            "\xF8\x80\x80\x80\x80",
         },
         {
             /* \U+0000 */
             "\xFC\x80\x80\x80\x80\x80",
             NULL,                               /* bug: rejected */
             "\\uFFFD",
-            "\xFC\x80\x80\x80\x80\x80",
         },
         /* 5  Illegal code positions */
         /* 5.1  Single UTF-16 surrogates */
@@ -834,7 +799,7 @@ static void utf8_string(void)
         for (j = 0; j < 2; j++) {
             json_in = test_cases[i].json_in;
             utf8_out = test_cases[i].utf8_out;
-            utf8_in = test_cases[i].utf8_in ?: test_cases[i].utf8_out;
+            utf8_in = test_cases[i].utf8_out ?: test_cases[i].json_in;
             json_out = test_cases[i].json_out ?: test_cases[i].json_in;
 
             /* Parse @json_in, expect @utf8_out */
-- 
cgit v1.2.3-55-g7522


From 5f454e662e710300b4c5414b2d5fd109ee18682b Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:41 +0200
Subject: check-qjson: Fix utf8_string() to test all invalid sequences

Some of utf8_string()'s test_cases[] contain multiple invalid
sequences.  Testing that qobject_from_json() fails only tests we
reject at least one invalid sequence.  That's incomplete.

Additionally test each non-space sequence in isolation.

This demonstrates that the JSON parser accepts invalid sequences
starting with \xC2..\xF4.  Add a FIXME comment.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-15-armbru@redhat.com>
---
 tests/check-qjson.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 8d0fe4c8a7..40a573eb21 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -20,6 +20,7 @@
 #include "qapi/qmp/qnull.h"
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/unicode.h"
 #include "qemu-common.h"
 
 static QString *from_json_str(const char *jstr, bool single, Error **errp)
@@ -410,7 +411,7 @@ static void utf8_string(void)
             "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
             "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
             "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ",
-            NULL,               /* bug: rejected */
+            NULL,               /* bug: rejected (partly, see FIXME below) */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
@@ -429,7 +430,7 @@ static void utf8_string(void)
         /* 3.2.3  All 8 first bytes of 4-byte sequences, followed by space */
         {
             "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
-            NULL,               /* bug: rejected */
+            NULL,               /* bug: rejected (partly, see FIXME below) */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.4  All 4 first bytes of 5-byte sequences, followed by space */
@@ -509,7 +510,7 @@ static void utf8_string(void)
         {
             "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
             "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
-            NULL,               /* bug: rejected */
+            NULL,               /* bug: rejected (partly, see FIXME below) */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
@@ -792,8 +793,8 @@ static void utf8_string(void)
     };
     int i, j;
     QString *str;
-    const char *json_in, *utf8_out, *utf8_in, *json_out;
-    char *jstr;
+    const char *json_in, *utf8_out, *utf8_in, *json_out, *tail;
+    char *end, *in, *jstr;
 
     for (i = 0; test_cases[i].json_in; i++) {
         for (j = 0; j < 2; j++) {
@@ -810,6 +811,28 @@ static void utf8_string(void)
             } else {
                 str = from_json_str(json_in, j, NULL);
                 g_assert(!str);
+                /*
+                 * Failure may be due to any sequence, but *all* sequences
+                 * are expected to fail.  Test each one in isolation.
+                 */
+                for (tail = json_in; *tail; tail = end) {
+                    mod_utf8_codepoint(tail, 6, &end);
+                    if (*end == ' ') {
+                        end++;
+                    }
+                    in = strndup(tail, end - tail);
+                    str = from_json_str(in, j, NULL);
+                    /*
+                     * FIXME JSON parser accepts invalid sequence
+                     * starting with \xC2..\xF4
+                     */
+                    if (*in >= '\xC2' && *in <= '\xF4') {
+                        g_free(str);
+                        str = NULL;
+                    }
+                    g_assert(!str);
+                    g_free(in);
+                }
             }
 
             /* Unparse @utf8_in, expect @json_out */
-- 
cgit v1.2.3-55-g7522


From 6bc93a3401e32441c190c1f53e3967d226d4eb7c Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:42 +0200
Subject: check-qjson qmp-test: Cover control characters more thoroughly

RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange
Format" requires control characters in strings to be escaped.
Demonstrate the JSON parser accepts U+0001 .. U+001F unescaped.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-16-armbru@redhat.com>
---
 tests/check-qjson.c | 36 ++++++++++++++++++++++++++++++------
 tests/qmp-test.c    | 14 ++++++++++++++
 2 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 40a573eb21..a586189d87 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -192,6 +192,26 @@ static void utf8_string(void)
          *   We may choose to define this as feature
          */
 
+        /* 0  Control characters */
+        {
+            /*
+             * Note: \x00 is impossible, other representations of
+             * U+0000 are covered under 4.3
+             */
+            "\x01\x02\x03\x04\x05\x06\x07"
+            "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+            "\x10\x11\x12\x13\x14\x15\x16\x17"
+            "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+            /* bug: not corrected (valid UTF-8, but invalid JSON) */
+            "\x01\x02\x03\x04\x05\x06\x07"
+            "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+            "\x10\x11\x12\x13\x14\x15\x16\x17"
+            "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+            "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007"
+            "\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F"
+            "\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017"
+            "\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F",
+        },
         /* 1  Some correct UTF-8 text */
         {
             /* a bit of German */
@@ -211,14 +231,14 @@ static void utf8_string(void)
         /* 2  Boundary condition test cases */
         /* 2.1  First possible sequence of a certain length */
         /*
-         * 2.1.1  1 byte U+0001
-         * \x00 is impossible, test \x01 instead.  Other
-         * representations of U+0000 are covered under 4.3.
+         * 2.1.1 1 byte U+0020
+         * Control characters are already covered by their own test
+         * case under 0.  Test the first 1 byte non-control character
+         * here.
          */
         {
-            "\x01",
-            "\x01",
-            "\\u0001",
+            " ",
+            " ",
         },
         /* 2.1.2  2 bytes U+0080 */
         {
@@ -1333,6 +1353,10 @@ static void junk_input(void)
     g_assert(!err);             /* BUG */
     g_assert(obj == NULL);
 
+    obj = qobject_from_json("{\x01", &err);
+    g_assert(!err);             /* BUG */
+    g_assert(obj == NULL);
+
     obj = qobject_from_json("[0\xFF]", &err);
     error_free_or_abort(&err);
     g_assert(obj == NULL);
diff --git a/tests/qmp-test.c b/tests/qmp-test.c
index 17153192fe..5edc97f63f 100644
--- a/tests/qmp-test.c
+++ b/tests/qmp-test.c
@@ -71,6 +71,13 @@ static void test_malformed(QTestState *qts)
     qobject_unref(resp);
     g_assert(recovered(qts));
 
+    /* lexical error: funny control character outside string */
+    qtest_qmp_send_raw(qts, "{\x01");
+    resp = qtest_qmp_receive(qts);
+    g_assert_cmpstr(get_error_class(resp), ==, "GenericError");
+    qobject_unref(resp);
+    g_assert(recovered(qts));
+
     /* lexical error: impossible byte in string */
     qtest_qmp_send_raw(qts, "{'bad \xFF");
     resp = qtest_qmp_receive(qts);
@@ -78,6 +85,13 @@ static void test_malformed(QTestState *qts)
     qobject_unref(resp);
     g_assert(recovered(qts));
 
+    /* lexical error: control character in string */
+    qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n'}");
+    resp = qtest_qmp_receive(qts);
+    g_assert_cmpstr(get_error_class(resp), ==, "CommandNotFound"); /* BUG */
+    qobject_unref(resp);
+    g_assert(recovered(qts));
+
     /* lexical error: interpolation */
     qtest_qmp_send_raw(qts, "%%p\n");
     resp = qtest_qmp_receive(qts);
-- 
cgit v1.2.3-55-g7522


From 2e933f5701c57cc857044fbd818e272059811e48 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:43 +0200
Subject: check-qjson: Cover interpolation more thoroughly

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-17-armbru@redhat.com>
---
 tests/check-qjson.c | 158 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 98 insertions(+), 60 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index a586189d87..1688b2f5c1 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -876,30 +876,6 @@ static void utf8_string(void)
     }
 }
 
-static void vararg_string(void)
-{
-    int i;
-    struct {
-        const char *decoded;
-    } test_cases[] = {
-        { "hello world" },
-        { "the quick brown fox jumped over the fence" },
-        {}
-    };
-
-    for (i = 0; test_cases[i].decoded; i++) {
-        QString *str;
-
-        str = qobject_to(QString,
-                         qobject_from_jsonf_nofail("%s",
-                                                   test_cases[i].decoded));
-        g_assert(str);
-        g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
-
-        qobject_unref(str);
-    }
-}
-
 static void simple_number(void)
 {
     int i;
@@ -1017,29 +993,6 @@ static void float_number(void)
     }
 }
 
-static void vararg_number(void)
-{
-    QNum *qnum;
-    int value = 0x2342;
-    long long value_ll = 0x2342342343LL;
-    double valuef = 2.323423423;
-    int64_t val;
-
-    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%d", value));
-    g_assert(qnum_get_try_int(qnum, &val));
-    g_assert_cmpint(val, ==, value);
-    qobject_unref(qnum);
-
-    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lld", value_ll));
-    g_assert(qnum_get_try_int(qnum, &val));
-    g_assert_cmpint(val, ==, value_ll);
-    qobject_unref(qnum);
-
-    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", valuef));
-    g_assert(qnum_get_double(qnum) == valuef);
-    qobject_unref(qnum);
-}
-
 static void keyword_literal(void)
 {
     QObject *obj;
@@ -1069,6 +1022,35 @@ static void keyword_literal(void)
 
     qobject_unref(qbool);
 
+    obj = qobject_from_json("null", &error_abort);
+    g_assert(obj != NULL);
+    g_assert(qobject_type(obj) == QTYPE_QNULL);
+
+    null = qnull();
+    g_assert(QOBJECT(null) == obj);
+
+    qobject_unref(obj);
+    qobject_unref(null);
+}
+
+static void interpolation_valid(void)
+{
+    long long value_lld = 0x123456789abcdefLL;
+    long value_ld = (long)value_lld;
+    int value_d = (int)value_lld;
+    unsigned long long value_llu = 0xfedcba9876543210ULL;
+    unsigned long value_lu = (unsigned long)value_llu;
+    unsigned value_u = (unsigned)value_llu;
+    double value_f = 2.323423423;
+    const char *value_s = "hello world";
+    QObject *value_p = QOBJECT(qnull());
+    QBool *qbool;
+    QNum *qnum;
+    QString *qstr;
+    QObject *qobj;
+
+    /* bool */
+
     qbool = qobject_to(QBool, qobject_from_jsonf_nofail("%i", false));
     g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == false);
@@ -1080,15 +1062,70 @@ static void keyword_literal(void)
     g_assert(qbool_get_bool(qbool) == true);
     qobject_unref(qbool);
 
-    obj = qobject_from_json("null", &error_abort);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QNULL);
+    /* number */
 
-    null = qnull();
-    g_assert(QOBJECT(null) == obj);
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%d", value_d));
+    g_assert_cmpint(qnum_get_int(qnum), ==, value_d);
+    qobject_unref(qnum);
 
-    qobject_unref(obj);
-    qobject_unref(null);
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%ld", value_ld));
+    g_assert_cmpint(qnum_get_int(qnum), ==, value_ld);
+    qobject_unref(qnum);
+
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lld", value_lld));
+    g_assert_cmpint(qnum_get_int(qnum), ==, value_lld);
+    qobject_unref(qnum);
+
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%u", value_u));
+    g_assert_cmpuint(qnum_get_uint(qnum), ==, value_u);
+    qobject_unref(qnum);
+
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lu", value_lu));
+    g_assert_cmpuint(qnum_get_uint(qnum), ==, value_lu);
+    qobject_unref(qnum);
+
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%llu", value_llu));
+    g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu);
+    qobject_unref(qnum);
+
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", value_f));
+    g_assert(qnum_get_double(qnum) == value_f);
+    qobject_unref(qnum);
+
+    /* string */
+
+    qstr = qobject_to(QString,
+                     qobject_from_jsonf_nofail("%s", value_s));
+    g_assert_cmpstr(qstring_get_try_str(qstr), ==, value_s);
+    qobject_unref(qstr);
+
+    /* object */
+
+    qobj = qobject_from_jsonf_nofail("%p", value_p);
+    g_assert(qobj == value_p);
+}
+
+static void interpolation_unknown(void)
+{
+    if (g_test_subprocess()) {
+        qobject_from_jsonf_nofail("%x", 666);
+    }
+    g_test_trap_subprocess(NULL, 0, 0);
+    g_test_trap_assert_failed();
+}
+
+static void interpolation_string(void)
+{
+    QLitObject decoded = QLIT_QLIST(((QLitObject[]){
+            QLIT_QSTR("%s"),
+            QLIT_QSTR("eins"),
+            {}}));
+    QObject *qobj;
+
+    /* Dangerous misfeature: % is silently ignored in strings */
+    qobj = qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei");
+    g_assert(qlit_equal_qobject(&decoded, qobj));
+    qobject_unref(qobj);
 }
 
 static void simple_dict(void)
@@ -1309,7 +1346,7 @@ static void simple_whitespace(void)
     }
 }
 
-static void simple_varargs(void)
+static void simple_interpolation(void)
 {
     QObject *embedded_obj;
     QObject *obj;
@@ -1506,22 +1543,23 @@ int main(int argc, char **argv)
     g_test_add_func("/literals/string/escaped", escaped_string);
     g_test_add_func("/literals/string/quotes", string_with_quotes);
     g_test_add_func("/literals/string/utf8", utf8_string);
-    g_test_add_func("/literals/string/vararg", vararg_string);
 
     g_test_add_func("/literals/number/simple", simple_number);
     g_test_add_func("/literals/number/large", large_number);
     g_test_add_func("/literals/number/float", float_number);
-    g_test_add_func("/literals/number/vararg", vararg_number);
 
     g_test_add_func("/literals/keyword", keyword_literal);
 
+    g_test_add_func("/literals/interpolation/valid", interpolation_valid);
+    g_test_add_func("/literals/interpolation/unkown", interpolation_unknown);
+    g_test_add_func("/literals/interpolation/string", interpolation_string);
+
     g_test_add_func("/dicts/simple_dict", simple_dict);
     g_test_add_func("/dicts/large_dict", large_dict);
     g_test_add_func("/lists/simple_list", simple_list);
 
-    g_test_add_func("/whitespace/simple_whitespace", simple_whitespace);
-
-    g_test_add_func("/varargs/simple_varargs", simple_varargs);
+    g_test_add_func("/mixed/simple_whitespace", simple_whitespace);
+    g_test_add_func("/mixed/interpolation", simple_interpolation);
 
     g_test_add_func("/errors/empty", empty_input);
     g_test_add_func("/errors/blank", blank_input);
-- 
cgit v1.2.3-55-g7522


From 340db1ed82f8ced40a3e778c08963005369e2926 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:45 +0200
Subject: json: Reject unescaped control characters

Fix the lexer to reject unescaped control characters in JSON strings,
in accordance with RFC 8259 "The JavaScript Object Notation (JSON)
Data Interchange Format".

Bonus: we now recover more nicely from unclosed strings.  E.g.

    {"one: 1}\n{"two": 2}

now recovers cleanly after the newline, where before the lexer
remained confused until the next unpaired double quote or lexical
error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-19-armbru@redhat.com>
---
 qobject/json-lexer.c | 4 ++--
 tests/check-qjson.c  | 6 +-----
 tests/qmp-test.c     | 4 ++--
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 7c0875d225..e85e9a78ff 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -115,7 +115,7 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = IN_DQ_UCODE0,
     },
     [IN_DQ_STRING] = {
-        [1 ... 0xBF] = IN_DQ_STRING,
+        [0x20 ... 0xBF] = IN_DQ_STRING,
         [0xC2 ... 0xF4] = IN_DQ_STRING,
         ['\\'] = IN_DQ_STRING_ESCAPE,
         ['"'] = JSON_STRING,
@@ -155,7 +155,7 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = IN_SQ_UCODE0,
     },
     [IN_SQ_STRING] = {
-        [1 ... 0xBF] = IN_SQ_STRING,
+        [0x20 ... 0xBF] = IN_SQ_STRING,
         [0xC2 ... 0xF4] = IN_SQ_STRING,
         ['\\'] = IN_SQ_STRING_ESCAPE,
         ['\''] = JSON_STRING,
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 1688b2f5c1..f1405ad47a 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -202,11 +202,7 @@ static void utf8_string(void)
             "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
             "\x10\x11\x12\x13\x14\x15\x16\x17"
             "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
-            /* bug: not corrected (valid UTF-8, but invalid JSON) */
-            "\x01\x02\x03\x04\x05\x06\x07"
-            "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
-            "\x10\x11\x12\x13\x14\x15\x16\x17"
-            "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+            NULL,
             "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007"
             "\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F"
             "\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017"
diff --git a/tests/qmp-test.c b/tests/qmp-test.c
index 5edc97f63f..7b3ba17c4a 100644
--- a/tests/qmp-test.c
+++ b/tests/qmp-test.c
@@ -86,9 +86,9 @@ static void test_malformed(QTestState *qts)
     g_assert(recovered(qts));
 
     /* lexical error: control character in string */
-    qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n'}");
+    qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n");
     resp = qtest_qmp_receive(qts);
-    g_assert_cmpstr(get_error_class(resp), ==, "CommandNotFound"); /* BUG */
+    g_assert_cmpstr(get_error_class(resp), ==, "GenericError");
     qobject_unref(resp);
     g_assert(recovered(qts));
 
-- 
cgit v1.2.3-55-g7522


From a89d3104a29c400dfed4b675d6385a17223f9e0f Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:48 +0200
Subject: check-qjson: Document we expect invalid UTF-8 to be rejected

The JSON parser rejects some invalid sequences, but accepts others
without correcting the problem.

We should either reject all invalid sequences, or minimize overlong
sequences and replace all other invalid sequences by a suitable
replacement character.  A common choice for replacement is U+FFFD.

I'm going to implement the former.  Update the comments in
utf8_string() to expect this.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-22-armbru@redhat.com>
---
 tests/check-qjson.c | 151 ++++++++++++++++++++++++----------------------------
 1 file changed, 71 insertions(+), 80 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index f1405ad47a..69f5a187c9 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -157,13 +157,7 @@ static void utf8_string(void)
      * They're all marked "bug:" below, and are to be replaced by
      * correct ones as the bugs get fixed.
      *
-     * The JSON parser rejects some invalid sequences, but accepts
-     * others without correcting the problem.
-     *
-     * We should either reject all invalid sequences, or minimize
-     * overlong sequences and replace all other invalid sequences by a
-     * suitable replacement character.  A common choice for
-     * replacement is U+FFFD.
+     * The JSON parser rejects some, but not all invalid sequences.
      *
      * Problem: we can't easily deal with embedded U+0000.  Parsing
      * the JSON string "this \\u0000" is fun" yields "this \0 is fun",
@@ -185,11 +179,8 @@ static void utf8_string(void)
     } test_cases[] = {
         /*
          * Bug markers used here:
-         * - bug: not corrected
-         *   JSON parser fails to correct invalid sequence(s)
-         * - bug: rejected
-         *   JSON parser rejects invalid sequence(s)
-         *   We may choose to define this as feature
+         * - bug: not rejected
+         *   JSON parser fails to reject invalid sequence(s)
          */
 
         /* 0  Control characters */
@@ -257,13 +248,13 @@ static void utf8_string(void)
         /* 2.1.5  5 bytes U+200000 */
         {
             "\xF8\x88\x80\x80\x80",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 2.1.6  6 bytes U+4000000 */
         {
             "\xFC\x84\x80\x80\x80\x80",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 2.2  Last possible sequence of a certain length */
@@ -296,19 +287,19 @@ static void utf8_string(void)
         /* 2.2.4  4 bytes U+1FFFFF */
         {
             "\xF7\xBF\xBF\xBF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 2.2.5  5 bytes U+3FFFFFF */
         {
             "\xFB\xBF\xBF\xBF\xBF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 2.2.6  6 bytes U+7FFFFFFF */
         {
             "\xFD\xBF\xBF\xBF\xBF\xBF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 2.3  Other boundary conditions */
@@ -347,49 +338,49 @@ static void utf8_string(void)
         /* 3.1.1  First continuation byte */
         {
             "\x80",
-            "\x80",             /* bug: not corrected */
+            "\x80",             /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.1.2  Last continuation byte */
         {
             "\xBF",
-            "\xBF",             /* bug: not corrected */
+            "\xBF",             /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.1.3  2 continuation bytes */
         {
             "\x80\xBF",
-            "\x80\xBF",         /* bug: not corrected */
+            "\x80\xBF",         /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         /* 3.1.4  3 continuation bytes */
         {
             "\x80\xBF\x80",
-            "\x80\xBF\x80",     /* bug: not corrected */
+            "\x80\xBF\x80",     /* bug: not rejected */
             "\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.5  4 continuation bytes */
         {
             "\x80\xBF\x80\xBF",
-            "\x80\xBF\x80\xBF", /* bug: not corrected */
+            "\x80\xBF\x80\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.6  5 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80",
-            "\x80\xBF\x80\xBF\x80", /* bug: not corrected */
+            "\x80\xBF\x80\xBF\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.7  6 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80\xBF",
-            "\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */
+            "\x80\xBF\x80\xBF\x80\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.8  7 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80\xBF\x80",
-            "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */
+            "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.9  Sequence of all 64 possible continuation bytes */
@@ -402,7 +393,7 @@ static void utf8_string(void)
             "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
             "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
             "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
-             /* bug: not corrected */
+             /* bug: not rejected */
             "\x80\x81\x82\x83\x84\x85\x86\x87"
             "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
             "\x90\x91\x92\x93\x94\x95\x96\x97"
@@ -427,7 +418,7 @@ static void utf8_string(void)
             "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
             "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
             "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ",
-            NULL,               /* bug: rejected (partly, see FIXME below) */
+            NULL,               /* bug: accepted partly, see FIXME below */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
@@ -437,7 +428,7 @@ static void utf8_string(void)
         {
             "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
             "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
-            /* bug: not corrected */
+            /* bug: not rejected */
             "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
             "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
@@ -446,131 +437,131 @@ static void utf8_string(void)
         /* 3.2.3  All 8 first bytes of 4-byte sequences, followed by space */
         {
             "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
-            NULL,               /* bug: rejected (partly, see FIXME below) */
+            NULL,               /* bug: accepted partly, see FIXME below */
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.4  All 4 first bytes of 5-byte sequences, followed by space */
         {
             "\xF8 \xF9 \xFA \xFB ",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.5  All 2 first bytes of 6-byte sequences, followed by space */
         {
             "\xFC \xFD ",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD \\uFFFD ",
         },
         /* 3.3  Sequences with last continuation byte missing */
         /* 3.3.1  2-byte sequence with last byte missing (U+0000) */
         {
             "\xC0",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.2  3-byte sequence with last byte missing (U+0000) */
         {
             "\xE0\x80",
-            "\xE0\x80",           /* bug: not corrected */
+            "\xE0\x80",         /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.3.3  4-byte sequence with last byte missing (U+0000) */
         {
             "\xF0\x80\x80",
-            "\xF0\x80\x80",     /* bug: not corrected */
+            "\xF0\x80\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.3.4  5-byte sequence with last byte missing (U+0000) */
         {
             "\xF8\x80\x80\x80",
-            NULL,                   /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.5  6-byte sequence with last byte missing (U+0000) */
         {
             "\xFC\x80\x80\x80\x80",
-            NULL,                        /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.6  2-byte sequence with last byte missing (U+07FF) */
         {
             "\xDF",
-            "\xDF",             /* bug: not corrected */
+            "\xDF",             /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.3.7  3-byte sequence with last byte missing (U+FFFF) */
         {
             "\xEF\xBF",
-            "\xEF\xBF",           /* bug: not corrected */
+            "\xEF\xBF",         /* bug: not rejected */
             "\\uFFFD",
         },
         /* 3.3.8  4-byte sequence with last byte missing (U+1FFFFF) */
         {
             "\xF7\xBF\xBF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.9  5-byte sequence with last byte missing (U+3FFFFFF) */
         {
             "\xFB\xBF\xBF\xBF",
-            NULL,                 /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.10  6-byte sequence with last byte missing (U+7FFFFFFF) */
         {
             "\xFD\xBF\xBF\xBF\xBF",
-            NULL,                        /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.4  Concatenation of incomplete sequences */
         {
             "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
             "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
-            NULL,               /* bug: rejected (partly, see FIXME below) */
+            NULL,               /* bug: accepted partly, see FIXME below */
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.5  Impossible bytes */
         {
             "\xFE",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             "\xFF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             "\xFE\xFE\xFF\xFF",
-            NULL,                 /* bug: rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 4  Overlong sequences */
         /* 4.1  Overlong '/' */
         {
             "\xC0\xAF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             "\xE0\x80\xAF",
-            "\xE0\x80\xAF",     /* bug: not corrected */
+            "\xE0\x80\xAF",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             "\xF0\x80\x80\xAF",
-            "\xF0\x80\x80\xAF",  /* bug: not corrected */
+            "\xF0\x80\x80\xAF", /* bug: not rejected */
             "\\uFFFD",
         },
         {
             "\xF8\x80\x80\x80\xAF",
-            NULL,                        /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             "\xFC\x80\x80\x80\x80\xAF",
-            NULL,                               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /*
@@ -582,13 +573,13 @@ static void utf8_string(void)
         {
             /* \U+007F */
             "\xC1\xBF",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+07FF */
             "\xE0\x9F\xBF",
-            "\xE0\x9F\xBF",     /* bug: not corrected */
+            "\xE0\x9F\xBF",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
@@ -599,50 +590,50 @@ static void utf8_string(void)
              * also 2.2.3
              */
             "\xF0\x8F\xBF\xBC",
-            "\xF0\x8F\xBF\xBC",   /* bug: not corrected */
+            "\xF0\x8F\xBF\xBC", /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+1FFFFF */
             "\xF8\x87\xBF\xBF\xBF",
-            NULL,                        /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+3FFFFFF */
             "\xFC\x83\xBF\xBF\xBF\xBF",
-            NULL,                               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 4.3  Overlong representation of the NUL character */
         {
             /* \U+0000 */
             "\xC0\x80",
-            NULL,               /* bug: rejected */
+            NULL,
             "\\u0000",
         },
         {
             /* \U+0000 */
             "\xE0\x80\x80",
-            "\xE0\x80\x80",     /* bug: not corrected */
+            "\xE0\x80\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+0000 */
             "\xF0\x80\x80\x80",
-            "\xF0\x80\x80\x80",   /* bug: not corrected */
+            "\xF0\x80\x80\x80", /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+0000 */
             "\xF8\x80\x80\x80\x80",
-            NULL,                        /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+0000 */
             "\xFC\x80\x80\x80\x80\x80",
-            NULL,                               /* bug: rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 5  Illegal code positions */
@@ -650,92 +641,92 @@ static void utf8_string(void)
         {
             /* \U+D800 */
             "\xED\xA0\x80",
-            "\xED\xA0\x80",     /* bug: not corrected */
+            "\xED\xA0\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DB7F */
             "\xED\xAD\xBF",
-            "\xED\xAD\xBF",     /* bug: not corrected */
+            "\xED\xAD\xBF",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DB80 */
             "\xED\xAE\x80",
-            "\xED\xAE\x80",     /* bug: not corrected */
+            "\xED\xAE\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DBFF */
             "\xED\xAF\xBF",
-            "\xED\xAF\xBF",     /* bug: not corrected */
+            "\xED\xAF\xBF",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DC00 */
             "\xED\xB0\x80",
-            "\xED\xB0\x80",     /* bug: not corrected */
+            "\xED\xB0\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DF80 */
             "\xED\xBE\x80",
-            "\xED\xBE\x80",     /* bug: not corrected */
+            "\xED\xBE\x80",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+DFFF */
             "\xED\xBF\xBF",
-            "\xED\xBF\xBF",     /* bug: not corrected */
+            "\xED\xBF\xBF",     /* bug: not rejected */
             "\\uFFFD",
         },
         /* 5.2  Paired UTF-16 surrogates */
         {
             /* \U+D800\U+DC00 */
             "\xED\xA0\x80\xED\xB0\x80",
-            "\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */
+            "\xED\xA0\x80\xED\xB0\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+D800\U+DFFF */
             "\xED\xA0\x80\xED\xBF\xBF",
-            "\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */
+            "\xED\xA0\x80\xED\xBF\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DC00 */
             "\xED\xAD\xBF\xED\xB0\x80",
-            "\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */
+            "\xED\xAD\xBF\xED\xB0\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DFFF */
             "\xED\xAD\xBF\xED\xBF\xBF",
-            "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */
+            "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DC00 */
             "\xED\xAE\x80\xED\xB0\x80",
-            "\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */
+            "\xED\xAE\x80\xED\xB0\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DFFF */
             "\xED\xAE\x80\xED\xBF\xBF",
-            "\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */
+            "\xED\xAE\x80\xED\xBF\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DC00 */
             "\xED\xAF\xBF\xED\xB0\x80",
-            "\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */
+            "\xED\xAF\xBF\xED\xB0\x80", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DFFF */
             "\xED\xAF\xBF\xED\xBF\xBF",
-            "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */
+            "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not rejected */
             "\\uFFFD\\uFFFD",
         },
         /* 5.3  Other illegal code positions */
@@ -743,25 +734,25 @@ static void utf8_string(void)
         {
             /* \U+FFFE */
             "\xEF\xBF\xBE",
-            "\xEF\xBF\xBE",     /* bug: not corrected */
+            "\xEF\xBF\xBE",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* \U+FFFF */
             "\xEF\xBF\xBF",
-            "\xEF\xBF\xBF",     /* bug: not corrected */
+            "\xEF\xBF\xBF",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* U+FDD0 */
             "\xEF\xB7\x90",
-            "\xEF\xB7\x90",     /* bug: not corrected */
+            "\xEF\xB7\x90",     /* bug: not rejected */
             "\\uFFFD",
         },
         {
             /* U+FDEF */
             "\xEF\xB7\xAF",
-            "\xEF\xB7\xAF",     /* bug: not corrected */
+            "\xEF\xB7\xAF",     /* bug: not rejected */
             "\\uFFFD",
         },
         /* Plane 1 .. 16 noncharacters */
@@ -783,7 +774,7 @@ static void utf8_string(void)
             "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
             "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
             "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
-            /* bug: not corrected */
+            /* bug: not rejected */
             "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
             "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
             "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"
-- 
cgit v1.2.3-55-g7522


From e59f39d40397645477b959255aedfa17a7c9c779 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:49 +0200
Subject: json: Reject invalid UTF-8 sequences

We reject bytes that can't occur in valid UTF-8 (\xC0..\xC1,
\xF5..\xFF in the lexer.  That's insufficient; there's plenty of
invalid UTF-8 not containing these bytes, as demonstrated by
check-qjson:

* Malformed sequences

  - Unexpected continuation bytes

  - Missing continuation bytes after start bytes other than
    \xC0..\xC1, \xF5..\xFD.

* Overlong sequences with start bytes other than \xC0..\xC1,
  \xF5..\xFD.

* Invalid code points

Fixing this in the lexer would be bothersome.  Fixing it in the parser
is straightforward, so do that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-23-armbru@redhat.com>
---
 include/qemu/unicode.h |   1 +
 qobject/json-parser.c  |  20 +++++---
 tests/check-qjson.c    | 137 ++++++++++++++++---------------------------------
 util/unicode.c         |  69 ++++++++++++++++++++++---
 4 files changed, 122 insertions(+), 105 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/include/qemu/unicode.h b/include/qemu/unicode.h
index 71c72db461..7fa10b8e60 100644
--- a/include/qemu/unicode.h
+++ b/include/qemu/unicode.h
@@ -2,5 +2,6 @@
 #define QEMU_UNICODE_H
 
 int mod_utf8_codepoint(const char *s, size_t n, char **end);
+ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint);
 
 #endif
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 164b86769b..0e232ff101 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/unicode.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qapi/qmp/qbool.h"
@@ -133,6 +134,10 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
     const char *ptr = token->str;
     QString *str;
     char quote;
+    int cp;
+    char *end;
+    ssize_t len;
+    char utf8_buf[5];
 
     assert(*ptr == '"' || *ptr == '\'');
     quote = *ptr++;
@@ -194,12 +199,15 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
                 goto out;
             }
         } else {
-            char dummy[2];
-
-            dummy[0] = *ptr++;
-            dummy[1] = 0;
-
-            qstring_append(str, dummy);
+            cp = mod_utf8_codepoint(ptr, 6, &end);
+            if (cp <= 0) {
+                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
+                goto out;
+            }
+            ptr = end;
+            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
+            assert(len >= 0);
+            qstring_append(str, utf8_buf);
         }
     }
 
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 69f5a187c9..71c77d2f70 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -152,13 +152,6 @@ static void string_with_quotes(void)
 static void utf8_string(void)
 {
     /*
-     * FIXME Current behavior for invalid UTF-8 sequences is
-     * incorrect.  This test expects current, incorrect results.
-     * They're all marked "bug:" below, and are to be replaced by
-     * correct ones as the bugs get fixed.
-     *
-     * The JSON parser rejects some, but not all invalid sequences.
-     *
      * Problem: we can't easily deal with embedded U+0000.  Parsing
      * the JSON string "this \\u0000" is fun" yields "this \0 is fun",
      * which gets misinterpreted as NUL-terminated "this ".  We should
@@ -177,12 +170,6 @@ static void utf8_string(void)
         /* Expected unparse output, defaults to @json_in */
         const char *json_out;
     } test_cases[] = {
-        /*
-         * Bug markers used here:
-         * - bug: not rejected
-         *   JSON parser fails to reject invalid sequence(s)
-         */
-
         /* 0  Control characters */
         {
             /*
@@ -330,7 +317,7 @@ static void utf8_string(void)
         {
             /* first one beyond Unicode range: U+110000 */
             "\xF4\x90\x80\x80",
-            "\xF4\x90\x80\x80",
+            NULL,
             "\\uFFFD",
         },
         /* 3  Malformed sequences */
@@ -338,49 +325,49 @@ static void utf8_string(void)
         /* 3.1.1  First continuation byte */
         {
             "\x80",
-            "\x80",             /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.1.2  Last continuation byte */
         {
             "\xBF",
-            "\xBF",             /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.1.3  2 continuation bytes */
         {
             "\x80\xBF",
-            "\x80\xBF",         /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         /* 3.1.4  3 continuation bytes */
         {
             "\x80\xBF\x80",
-            "\x80\xBF\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.5  4 continuation bytes */
         {
             "\x80\xBF\x80\xBF",
-            "\x80\xBF\x80\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.6  5 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80",
-            "\x80\xBF\x80\xBF\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.7  6 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80\xBF",
-            "\x80\xBF\x80\xBF\x80\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.8  7 continuation bytes */
         {
             "\x80\xBF\x80\xBF\x80\xBF\x80",
-            "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.1.9  Sequence of all 64 possible continuation bytes */
@@ -393,16 +380,7 @@ static void utf8_string(void)
             "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
             "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
             "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
-             /* bug: not rejected */
-            "\x80\x81\x82\x83\x84\x85\x86\x87"
-            "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
-            "\x90\x91\x92\x93\x94\x95\x96\x97"
-            "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
-            "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
-            "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
-            "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
-            "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
-            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
@@ -410,6 +388,7 @@ static void utf8_string(void)
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+            "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
         /* 3.2  Lonely start characters */
         /* 3.2.1  All 32 first bytes of 2-byte sequences, followed by space */
@@ -418,7 +397,7 @@ static void utf8_string(void)
             "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
             "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
             "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ",
-            NULL,               /* bug: accepted partly, see FIXME below */
+            NULL,
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
@@ -428,16 +407,14 @@ static void utf8_string(void)
         {
             "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
             "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
-            /* bug: not rejected */
-            "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
-            "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
+            NULL,
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.3  All 8 first bytes of 4-byte sequences, followed by space */
         {
             "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
-            NULL,               /* bug: accepted partly, see FIXME below */
+            NULL,
             "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ",
         },
         /* 3.2.4  All 4 first bytes of 5-byte sequences, followed by space */
@@ -462,13 +439,13 @@ static void utf8_string(void)
         /* 3.3.2  3-byte sequence with last byte missing (U+0000) */
         {
             "\xE0\x80",
-            "\xE0\x80",         /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.3  4-byte sequence with last byte missing (U+0000) */
         {
             "\xF0\x80\x80",
-            "\xF0\x80\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.4  5-byte sequence with last byte missing (U+0000) */
@@ -486,13 +463,13 @@ static void utf8_string(void)
         /* 3.3.6  2-byte sequence with last byte missing (U+07FF) */
         {
             "\xDF",
-            "\xDF",             /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.7  3-byte sequence with last byte missing (U+FFFF) */
         {
             "\xEF\xBF",
-            "\xEF\xBF",         /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 3.3.8  4-byte sequence with last byte missing (U+1FFFFF) */
@@ -517,7 +494,7 @@ static void utf8_string(void)
         {
             "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
             "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
-            NULL,               /* bug: accepted partly, see FIXME below */
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD",
         },
@@ -546,12 +523,12 @@ static void utf8_string(void)
         },
         {
             "\xE0\x80\xAF",
-            "\xE0\x80\xAF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             "\xF0\x80\x80\xAF",
-            "\xF0\x80\x80\xAF", /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
@@ -579,7 +556,7 @@ static void utf8_string(void)
         {
             /* \U+07FF */
             "\xE0\x9F\xBF",
-            "\xE0\x9F\xBF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
@@ -590,7 +567,7 @@ static void utf8_string(void)
              * also 2.2.3
              */
             "\xF0\x8F\xBF\xBC",
-            "\xF0\x8F\xBF\xBC", /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
@@ -615,13 +592,13 @@ static void utf8_string(void)
         {
             /* \U+0000 */
             "\xE0\x80\x80",
-            "\xE0\x80\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+0000 */
             "\xF0\x80\x80\x80",
-            "\xF0\x80\x80\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
@@ -641,92 +618,92 @@ static void utf8_string(void)
         {
             /* \U+D800 */
             "\xED\xA0\x80",
-            "\xED\xA0\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DB7F */
             "\xED\xAD\xBF",
-            "\xED\xAD\xBF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DB80 */
             "\xED\xAE\x80",
-            "\xED\xAE\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DBFF */
             "\xED\xAF\xBF",
-            "\xED\xAF\xBF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DC00 */
             "\xED\xB0\x80",
-            "\xED\xB0\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DF80 */
             "\xED\xBE\x80",
-            "\xED\xBE\x80",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+DFFF */
             "\xED\xBF\xBF",
-            "\xED\xBF\xBF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* 5.2  Paired UTF-16 surrogates */
         {
             /* \U+D800\U+DC00 */
             "\xED\xA0\x80\xED\xB0\x80",
-            "\xED\xA0\x80\xED\xB0\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+D800\U+DFFF */
             "\xED\xA0\x80\xED\xBF\xBF",
-            "\xED\xA0\x80\xED\xBF\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DC00 */
             "\xED\xAD\xBF\xED\xB0\x80",
-            "\xED\xAD\xBF\xED\xB0\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB7F\U+DFFF */
             "\xED\xAD\xBF\xED\xBF\xBF",
-            "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DC00 */
             "\xED\xAE\x80\xED\xB0\x80",
-            "\xED\xAE\x80\xED\xB0\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DB80\U+DFFF */
             "\xED\xAE\x80\xED\xBF\xBF",
-            "\xED\xAE\x80\xED\xBF\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DC00 */
             "\xED\xAF\xBF\xED\xB0\x80",
-            "\xED\xAF\xBF\xED\xB0\x80", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         {
             /* \U+DBFF\U+DFFF */
             "\xED\xAF\xBF\xED\xBF\xBF",
-            "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not rejected */
+            NULL,
             "\\uFFFD\\uFFFD",
         },
         /* 5.3  Other illegal code positions */
@@ -734,25 +711,25 @@ static void utf8_string(void)
         {
             /* \U+FFFE */
             "\xEF\xBF\xBE",
-            "\xEF\xBF\xBE",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* \U+FFFF */
             "\xEF\xBF\xBF",
-            "\xEF\xBF\xBF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* U+FDD0 */
             "\xEF\xB7\x90",
-            "\xEF\xB7\x90",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         {
             /* U+FDEF */
             "\xEF\xB7\xAF",
-            "\xEF\xB7\xAF",     /* bug: not rejected */
+            NULL,
             "\\uFFFD",
         },
         /* Plane 1 .. 16 noncharacters */
@@ -774,23 +751,7 @@ static void utf8_string(void)
             "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
             "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
             "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
-            /* bug: not rejected */
-            "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
-            "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
-            "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"
-            "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF"
-            "\xF1\x9F\xBF\xBE\xF1\x9F\xBF\xBF"
-            "\xF1\xAF\xBF\xBE\xF1\xAF\xBF\xBF"
-            "\xF1\xBF\xBF\xBE\xF1\xBF\xBF\xBF"
-            "\xF2\x8F\xBF\xBE\xF2\x8F\xBF\xBF"
-            "\xF2\x9F\xBF\xBE\xF2\x9F\xBF\xBF"
-            "\xF2\xAF\xBF\xBE\xF2\xAF\xBF\xBF"
-            "\xF2\xBF\xBF\xBE\xF2\xBF\xBF\xBF"
-            "\xF3\x8F\xBF\xBE\xF3\x8F\xBF\xBF"
-            "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF"
-            "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
-            "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
-            "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
+            NULL,
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
             "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
@@ -829,14 +790,6 @@ static void utf8_string(void)
                     }
                     in = strndup(tail, end - tail);
                     str = from_json_str(in, j, NULL);
-                    /*
-                     * FIXME JSON parser accepts invalid sequence
-                     * starting with \xC2..\xF4
-                     */
-                    if (*in >= '\xC2' && *in <= '\xF4') {
-                        g_free(str);
-                        str = NULL;
-                    }
                     g_assert(!str);
                     g_free(in);
                 }
diff --git a/util/unicode.c b/util/unicode.c
index a812a35171..8580bc598b 100644
--- a/util/unicode.c
+++ b/util/unicode.c
@@ -13,6 +13,21 @@
 #include "qemu/osdep.h"
 #include "qemu/unicode.h"
 
+static bool is_valid_codepoint(int codepoint)
+{
+    if (codepoint > 0x10FFFFu) {
+        return false;            /* beyond Unicode range */
+    }
+    if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF)
+        || (codepoint & 0xFFFE) == 0xFFFE) {
+        return false;            /* noncharacter */
+    }
+    if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
+        return false;            /* surrogate code point */
+    }
+    return true;
+}
+
 /**
  * mod_utf8_codepoint:
  * @s: string encoded in modified UTF-8
@@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end)
             cp <<= 6;
             cp |= byte & 0x3F;
         }
-        if (cp > 0x10FFFF) {
-            cp = -1;            /* beyond Unicode range */
-        } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
-                   || (cp & 0xFFFE) == 0xFFFE) {
-            cp = -1;            /* noncharacter */
-        } else if (cp >= 0xD800 && cp <= 0xDFFF) {
-            cp = -1;            /* surrogate code point */
+        if (!is_valid_codepoint(cp)) {
+            cp = -1;
         } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
             cp = -1;            /* overlong, not \xC0\x80 */
         }
@@ -99,3 +109,48 @@ out:
     *end = (char *)p;
     return cp;
 }
+
+/**
+ * mod_utf8_encode:
+ * @buf: Destination buffer
+ * @bufsz: size of @buf, at least 5.
+ * @codepoint: Unicode codepoint to encode
+ *
+ * Convert Unicode codepoint @codepoint to modified UTF-8.
+ *
+ * Returns: the length of the UTF-8 sequence on success, -1 when
+ * @codepoint is invalid.
+ */
+ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint)
+{
+    assert(bufsz >= 5);
+
+    if (!is_valid_codepoint(codepoint)) {
+        return -1;
+    }
+
+    if (codepoint > 0 && codepoint <= 0x7F) {
+        buf[0] = codepoint & 0x7F;
+        buf[1] = 0;
+        return 1;
+    }
+    if (codepoint <= 0x7FF) {
+        buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+        buf[1] = 0x80 | (codepoint & 0x3F);
+        buf[2] = 0;
+        return 2;
+    }
+    if (codepoint <= 0xFFFF) {
+        buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+        buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+        buf[2] = 0x80 | (codepoint & 0x3F);
+        buf[3] = 0;
+        return 3;
+    }
+    buf[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+    buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[3] = 0x80 | (codepoint & 0x3F);
+    buf[4] = 0;
+    return 4;
+}
-- 
cgit v1.2.3-55-g7522


From 4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:52 +0200
Subject: json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8")

Since the JSON grammer doesn't accept U+0000 anywhere, this merely
exchanges one kind of parse error for another.  It's purely for
consistency with qobject_to_json(), which accepts \xC0\x80 (see commit
e2ec3f97680).

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-26-armbru@redhat.com>
---
 qobject/json-lexer.c  | 2 +-
 qobject/json-parser.c | 2 +-
 tests/check-qjson.c   | 8 +-------
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 93fa2737e6..4c402f62d3 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -93,7 +93,7 @@
  *   interpolation = %((l|ll|I64)[du]|[ipsf])
  *
  * Note:
- * - Input must be encoded in UTF-8.
+ * - Input must be encoded in modified UTF-8.
  * - Decoding and validating is left to the parser.
  */
 
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index b77931614b..a9b227f56c 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -200,7 +200,7 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
             }
         } else {
             cp = mod_utf8_codepoint(ptr, 6, &end);
-            if (cp <= 0) {
+            if (cp < 0) {
                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
                 goto out;
             }
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 71c77d2f70..3abf12b4d2 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -152,12 +152,6 @@ static void string_with_quotes(void)
 static void utf8_string(void)
 {
     /*
-     * Problem: we can't easily deal with embedded U+0000.  Parsing
-     * the JSON string "this \\u0000" is fun" yields "this \0 is fun",
-     * which gets misinterpreted as NUL-terminated "this ".  We should
-     * consider using overlong encoding \xC0\x80 for U+0000 ("modified
-     * UTF-8").
-     *
      * Most test cases are scraped from Markus Kuhn's UTF-8 decoder
      * capability and stress test at
      * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
@@ -586,7 +580,7 @@ static void utf8_string(void)
         {
             /* \U+0000 */
             "\xC0\x80",
-            NULL,
+            "\xC0\x80",
             "\\u0000",
         },
         {
-- 
cgit v1.2.3-55-g7522


From 46a628b1398ae6a58d6847223736431225c4c0cc Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:55 +0200
Subject: json: Reject invalid \uXXXX, fix \u0000

The JSON parser translates invalid \uXXXX to garbage instead of
rejecting it, and swallows \u0000.

Fix by using mod_utf8_encode() instead of flawed wchar_to_utf8().

Valid surrogate pairs are now differently broken: they're rejected
instead of translated to garbage.  The next commit will fix them.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-29-armbru@redhat.com>
---
 qobject/json-parser.c | 35 ++++++-----------------------------
 tests/check-qjson.c   | 41 +++++++++++------------------------------
 2 files changed, 17 insertions(+), 59 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 9cb363f7e1..e49da192fe 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -64,34 +64,6 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
     error_setg(&ctxt->err, "JSON parse error, %s", message);
 }
 
-/**
- * String helpers
- *
- * These helpers are used to unescape strings.
- */
-static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
-{
-    if (wchar <= 0x007F) {
-        BUG_ON(buffer_length < 2);
-
-        buffer[0] = wchar & 0x7F;
-        buffer[1] = 0;
-    } else if (wchar <= 0x07FF) {
-        BUG_ON(buffer_length < 3);
-
-        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
-        buffer[1] = 0x80 | (wchar & 0x3F);
-        buffer[2] = 0;
-    } else {
-        BUG_ON(buffer_length < 4);
-
-        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
-        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
-        buffer[2] = 0x80 | (wchar & 0x3F);
-        buffer[3] = 0;
-    }
-}
-
 static int hex2decimal(char ch)
 {
     if (ch >= '0' && ch <= '9') {
@@ -197,7 +169,12 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
                     ptr++;
                 }
 
-                wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf));
+                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
+                    parse_error(ctxt, token,
+                                "\\u%.4s is not a valid Unicode character",
+                                ptr - 3);
+                    goto out;
+                }
                 qstring_append(str, utf8_buf);
                 break;
             default:
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 3abf12b4d2..4abb5847ad 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -64,7 +64,7 @@ static void escaped_string(void)
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
         { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
           /* bug: want \xF0\x9D\x84\x9E */
-          "quadruple byte utf-8 \xED\xA0\xB4\xED\xB4\x9E", .skip = 1 },
+          NULL },
         { "\\", NULL },
         { "\\z", NULL },
         { "\\ux", NULL },
@@ -72,35 +72,16 @@ static void escaped_string(void)
         { "\\u12x", NULL },
         { "\\u123x", NULL },
         { "\\u12345", "\341\210\2645" },
-        { "\\u0000x", "x", .skip = 1}, /* bug: want \xC0\x80x */
-        { "unpaired leading surrogate \\uD800",
-          /* bug: not rejected */
-          "unpaired leading surrogate \355\240\200", .skip = 1 },
-        { "unpaired leading surrogate \\uD800\\uCAFE",
-          /* bug: not rejected */
-          "unpaired leading surrogate \355\240\200\354\253\276", .skip = 1 },
-        { "unpaired leading surrogate \\uD800\\uD801\\uDC02",
-          /* bug: not rejected */
-          "unpaired leading surrogate \355\240\200\355\240\201\355\260\202",
-          .skip = 1 },
-        { "unpaired trailing surrogate \\uDC00",
-          /* bug: not rejected */
-          "unpaired trailing surrogate \355\260\200", .skip = 1},
-        { "backward surrogate pair \\uDC00\\uD800",
-          /* bug: not rejected */
-          "backward surrogate pair \355\260\200\355\240\200", .skip = 1},
-        { "noncharacter U+FDD0 \\uFDD0",
-          /* bug: not rejected */
-          "noncharacter U+FDD0 \xEF\xB7\x90", .skip = 1},
-        { "noncharacter U+FDEF \\uFDEF",
-          /* bug: not rejected */
-          "noncharacter U+FDEF \xEF\xB7\xAF", .skip = 1},
-        { "noncharacter U+1FFFE \\uD87F\\uDFFE",
-          /* bug: not rejected */
-          "noncharacter U+1FFFE \xED\xA1\xBF\xED\xBF\xBE", .skip = 1},
-        { "noncharacter U+10FFFF \\uDC3F\\uDFFF",
-          /* bug: not rejected */
-          "noncharacter U+10FFFF \xED\xB0\xBF\xED\xBF\xBF", .skip = 1},
+        { "\\u0000x", "\xC0\x80x" },
+        { "unpaired leading surrogate \\uD800", NULL },
+        { "unpaired leading surrogate \\uD800\\uCAFE", NULL },
+        { "unpaired leading surrogate \\uD800\\uD801\\uDC02", NULL },
+        { "unpaired trailing surrogate \\uDC00", NULL },
+        { "backward surrogate pair \\uDC00\\uD800", NULL },
+        { "noncharacter U+FDD0 \\uFDD0", NULL },
+        { "noncharacter U+FDEF \\uFDEF", NULL },
+        { "noncharacter U+1FFFE \\uD87F\\uDFFE", NULL },
+        { "noncharacter U+10FFFF \\uDC3F\\uDFFF", NULL },
         {}
     };
     int i, j;
-- 
cgit v1.2.3-55-g7522


From dc45a07c3628b82817a96fcb7df3d211d901af5d Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:56 +0200
Subject: json: Fix \uXXXX for surrogate pairs

The JSON parser treats each half of a surrogate pair as unpaired
surrogate.  Fix it to recognize surrogate pairs.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-30-armbru@redhat.com>
---
 qobject/json-parser.c | 60 +++++++++++++++++++++++++++++++++------------------
 tests/check-qjson.c   |  3 +--
 2 files changed, 40 insertions(+), 23 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index e49da192fe..73e6ad7458 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -64,16 +64,27 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
     error_setg(&ctxt->err, "JSON parse error, %s", message);
 }
 
-static int hex2decimal(char ch)
+static int cvt4hex(const char *s)
 {
-    if (ch >= '0' && ch <= '9') {
-        return (ch - '0');
-    } else if (ch >= 'a' && ch <= 'f') {
-        return 10 + (ch - 'a');
-    } else if (ch >= 'A' && ch <= 'F') {
-        return 10 + (ch - 'A');
+    int cp, i;
+
+    cp = 0;
+    for (i = 0; i < 4; i++) {
+        if (!qemu_isxdigit(s[i])) {
+            return -1;
+        }
+        cp <<= 4;
+        if (s[i] >= '0' && s[i] <= '9') {
+            cp |= s[i] - '0';
+        } else if (s[i] >= 'a' && s[i] <= 'f') {
+            cp |= 10 + s[i] - 'a';
+        } else if (s[i] >= 'A' && s[i] <= 'F') {
+            cp |= 10 + s[i] - 'A';
+        } else {
+            return -1;
+        }
     }
-    abort();
+    return cp;
 }
 
 /**
@@ -115,7 +126,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
     const char *ptr = token->str;
     QString *str;
     char quote;
-    int cp, i;
+    const char *beg;
+    int cp, trailing;
     char *end;
     ssize_t len;
     char utf8_buf[5];
@@ -127,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
     while (*ptr != quote) {
         assert(*ptr);
         if (*ptr == '\\') {
-            ptr++;
+            beg = ptr++;
             switch (*ptr++) {
             case '"':
                 qstring_append_chr(str, '"');
@@ -157,22 +169,28 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
                 qstring_append_chr(str, '\t');
                 break;
             case 'u':
-                cp = 0;
-                for (i = 0; i < 4; i++) {
-                    if (!qemu_isxdigit(*ptr)) {
-                        parse_error(ctxt, token,
-                                    "invalid hex escape sequence in string");
-                        goto out;
+                cp = cvt4hex(ptr);
+                ptr += 4;
+
+                /* handle surrogate pairs */
+                if (cp >= 0xD800 && cp <= 0xDBFF
+                    && ptr[0] == '\\' && ptr[1] == 'u') {
+                    /* leading surrogate followed by \u */
+                    cp = 0x10000 + ((cp & 0x3FF) << 10);
+                    trailing = cvt4hex(ptr + 2);
+                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
+                        /* followed by trailing surrogate */
+                        cp |= trailing & 0x3FF;
+                        ptr += 6;
+                    } else {
+                        cp = -1; /* invalid */
                     }
-                    cp <<= 4;
-                    cp |= hex2decimal(*ptr);
-                    ptr++;
                 }
 
                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
                     parse_error(ctxt, token,
-                                "\\u%.4s is not a valid Unicode character",
-                                ptr - 3);
+                                "%.*s is not a valid Unicode character",
+                                (int)(ptr - beg), beg);
                     goto out;
                 }
                 qstring_append(str, utf8_buf);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 4abb5847ad..343f8af36a 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -63,8 +63,7 @@ static void escaped_string(void)
         { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
         { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
-          /* bug: want \xF0\x9D\x84\x9E */
-          NULL },
+          "quadruple byte utf-8 \xF0\x9D\x84\x9E" },
         { "\\", NULL },
         { "\\z", NULL },
         { "\\ux", NULL },
-- 
cgit v1.2.3-55-g7522


From c473c379e1069079542c51f7063d44c2692abe6b Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:39:57 +0200
Subject: check-qjson: Fix and enable utf8_string()'s disabled part

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-31-armbru@redhat.com>
---
 tests/check-qjson.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 343f8af36a..defc21fa04 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -776,15 +776,10 @@ static void utf8_string(void)
             qobject_unref(str);
             g_free(jstr);
 
-            /*
-             * Parse @json_out right back
-             * Disabled, because qobject_from_json() is buggy, and I can't
-             * be bothered to add the expected incorrect results.
-             * FIXME Enable once these bugs have been fixed.
-             */
-            if (0 && json_out != json_in) {
+            /* Parse @json_out right back, unless it has replacements */
+            if (!strstr(json_out, "\\uFFFD")) {
                 str = from_json_str(json_out, j, &error_abort);
-                g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_out);
+                g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_in);
             }
         }
     }
-- 
cgit v1.2.3-55-g7522


From 62815d85aed71eff7b6c3a524705180fb04f5d30 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:01 +0200
Subject: json: Redesign the callback to consume JSON values

The classical way to structure parser and lexer is to have the client
call the parser to get an abstract syntax tree, the parser call the
lexer to get the next token, and the lexer call some function to get
input characters.

Another way to structure them would be to have the client feed
characters to the lexer, the lexer feed tokens to the parser, and the
parser feed abstract syntax trees to some callback provided by the
client.  This way is more easily integrated into an event loop that
dispatches input characters as they arrive.

Our JSON parser is kind of between the two.  The lexer feeds tokens to
a "streamer" instead of a real parser.  The streamer accumulates
tokens until it got the sequence of tokens that comprise a single JSON
value (it counts curly braces and square brackets to decide).  It
feeds those token sequences to a callback provided by the client.  The
callback passes each token sequence to the parser, and gets back an
abstract syntax tree.

I figure it was done that way to make a straightforward recursive
descent parser possible.  "Get next token" becomes "pop the first
token off the token sequence".  Drawback: we need to store a complete
token sequence.  Each token eats 13 + input characters + malloc
overhead bytes.

Observations:

1. This is not the only way to use recursive descent.  If we replaced
   "get next token" by a coroutine yield, we could do without a
   streamer.

2. The lexer reports errors by passing a JSON_ERROR token to the
   streamer.  This communicates the offending input characters and
   their location, but no more.

3. The streamer reports errors by passing a null token sequence to the
   callback.  The (already poor) lexical error information is thrown
   away.

4. Having the callback receive a token sequence duplicates the code to
   convert token sequence to abstract syntax tree in every callback.

5. Known bug: the streamer silently drops incomplete token sequences.

This commit rectifies 4. by lifting the call of the parser from the
callbacks into the streamer.  Later commits will address 3. and 5.

The lifting removes a bug from qjson.c's parse_json(): it passed a
pointer to a non-null Error * in certain cases, as demonstrated by
check-qjson.c.

json_parser_parse() is now unused.  It's a stupid wrapper around
json_parser_parse_err().  Drop it, and rename json_parser_parse_err()
to json_parser_parse().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-35-armbru@redhat.com>
---
 include/qapi/qmp/json-parser.h   |  3 +--
 include/qapi/qmp/json-streamer.h |  8 ++++++--
 monitor.c                        | 18 ++++++++----------
 qapi/qmp-dispatch.c              |  1 -
 qga/main.c                       | 12 +++---------
 qobject/json-parser.c            |  7 +------
 qobject/json-streamer.c          | 19 +++++++++++--------
 qobject/qjson.c                  | 14 +++++---------
 tests/check-qjson.c              |  1 -
 tests/libqtest.c                 | 10 ++++------
 10 files changed, 39 insertions(+), 54 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h
index 102f5c0068..a34209db7a 100644
--- a/include/qapi/qmp/json-parser.h
+++ b/include/qapi/qmp/json-parser.h
@@ -16,7 +16,6 @@
 
 #include "qemu-common.h"
 
-QObject *json_parser_parse(GQueue *tokens, va_list *ap);
-QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp);
+QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp);
 
 #endif
diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h
index 7922e185a5..e162fd01da 100644
--- a/include/qapi/qmp/json-streamer.h
+++ b/include/qapi/qmp/json-streamer.h
@@ -25,7 +25,9 @@ typedef struct JSONToken {
 
 typedef struct JSONMessageParser
 {
-    void (*emit)(struct JSONMessageParser *parser, GQueue *tokens);
+    void (*emit)(void *opaque, QObject *json, Error *err);
+    void *opaque;
+    va_list *ap;
     JSONLexer lexer;
     int brace_count;
     int bracket_count;
@@ -37,7 +39,9 @@ void json_message_process_token(JSONLexer *lexer, GString *input,
                                 JSONTokenType type, int x, int y);
 
 void json_message_parser_init(JSONMessageParser *parser,
-                              void (*func)(JSONMessageParser *, GQueue *));
+                              void (*emit)(void *opaque, QObject *json,
+                                           Error *err),
+                              void *opaque, va_list *ap);
 
 void json_message_parser_feed(JSONMessageParser *parser,
                              const char *buffer, size_t size);
diff --git a/monitor.c b/monitor.c
index 94f673511b..08f799a7bb 100644
--- a/monitor.c
+++ b/monitor.c
@@ -59,7 +59,6 @@
 #include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/json-streamer.h"
-#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/qlist.h"
 #include "qom/object_interfaces.h"
 #include "trace-root.h"
@@ -4256,18 +4255,15 @@ static void monitor_qmp_bh_dispatcher(void *data)
 
 #define  QMP_REQ_QUEUE_LEN_MAX  (8)
 
-static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens)
+static void handle_qmp_command(void *opaque, QObject *req, Error *err)
 {
-    QObject *req, *id = NULL;
+    Monitor *mon = opaque;
+    QObject *id = NULL;
     QDict *qdict;
-    MonitorQMP *mon_qmp = container_of(parser, MonitorQMP, parser);
-    Monitor *mon = container_of(mon_qmp, Monitor, qmp);
-    Error *err = NULL;
     QMPRequest *req_obj;
 
-    req = json_parser_parse_err(tokens, NULL, &err);
     if (!req && !err) {
-        /* json_parser_parse_err() sucks: can fail without setting @err */
+        /* json_parser_parse() sucks: can fail without setting @err */
         error_setg(&err, QERR_JSON_PARSING);
     }
 
@@ -4465,7 +4461,8 @@ static void monitor_qmp_event(void *opaque, int event)
         monitor_qmp_response_flush(mon);
         monitor_qmp_cleanup_queues(mon);
         json_message_parser_destroy(&mon->qmp.parser);
-        json_message_parser_init(&mon->qmp.parser, handle_qmp_command);
+        json_message_parser_init(&mon->qmp.parser, handle_qmp_command,
+                                 mon, NULL);
         mon_refcount--;
         monitor_fdsets_cleanup();
         break;
@@ -4683,7 +4680,8 @@ void monitor_init(Chardev *chr, int flags)
 
     if (monitor_is_qmp(mon)) {
         qemu_chr_fe_set_echo(&mon->chr, true);
-        json_message_parser_init(&mon->qmp.parser, handle_qmp_command);
+        json_message_parser_init(&mon->qmp.parser, handle_qmp_command,
+                                 mon, NULL);
         if (mon->use_io_thread) {
             /*
              * Make sure the old iowatch is gone.  It's possible when
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 6f2d466596..d8da1a62de 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qmp/dispatch.h"
-#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qbool.h"
diff --git a/qga/main.c b/qga/main.c
index 87372d40ef..2fc49d00d8 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -19,7 +19,6 @@
 #include <sys/wait.h>
 #endif
 #include "qapi/qmp/json-streamer.h"
-#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qstring.h"
@@ -597,18 +596,13 @@ static void process_command(GAState *s, QDict *req)
 }
 
 /* handle requests/control events coming in over the channel */
-static void process_event(JSONMessageParser *parser, GQueue *tokens)
+static void process_event(void *opaque, QObject *obj, Error *err)
 {
-    GAState *s = container_of(parser, GAState, parser);
-    QObject *obj;
+    GAState *s = opaque;
     QDict *req, *rsp;
-    Error *err = NULL;
     int ret;
 
-    g_assert(s && parser);
-
     g_debug("process_event: called");
-    obj = json_parser_parse_err(tokens, NULL, &err);
     if (err) {
         goto err;
     }
@@ -1320,7 +1314,7 @@ static int run_agent(GAState *s, GAConfig *config, int socket_activation)
     s->command_state = ga_command_state_new();
     ga_command_state_init(s, s->command_state);
     ga_command_state_init_all(s->command_state);
-    json_message_parser_init(&s->parser, process_event);
+    json_message_parser_init(&s->parser, process_event, s, NULL);
 
 #ifndef _WIN32
     if (!register_signal_handlers()) {
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 7bfa08200c..95fa348e21 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -541,12 +541,7 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
     }
 }
 
-QObject *json_parser_parse(GQueue *tokens, va_list *ap)
-{
-    return json_parser_parse_err(tokens, ap, NULL);
-}
-
-QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
+QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 {
     JSONParserContext ctxt = { .buf = tokens };
     QObject *result;
diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
index 9f57ebf2bd..7fd0ff8756 100644
--- a/qobject/json-streamer.c
+++ b/qobject/json-streamer.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/json-streamer.h"
 
 #define MAX_TOKEN_SIZE (64ULL << 20)
@@ -38,8 +39,9 @@ void json_message_process_token(JSONLexer *lexer, GString *input,
                                 JSONTokenType type, int x, int y)
 {
     JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
+    Error *err = NULL;
     JSONToken *token;
-    GQueue *tokens;
+    QObject *json;
 
     switch (type) {
     case JSON_LCURLY:
@@ -97,19 +99,20 @@ out_emit:
     /* send current list of tokens to parser and reset tokenizer */
     parser->brace_count = 0;
     parser->bracket_count = 0;
-    /* parser->emit takes ownership of parser->tokens.  Remove our own
-     * reference to parser->tokens before handing it out to parser->emit.
-     */
-    tokens = parser->tokens;
+    json = json_parser_parse(parser->tokens, parser->ap, &err);
     parser->tokens = g_queue_new();
-    parser->emit(parser, tokens);
     parser->token_size = 0;
+    parser->emit(parser->opaque, json, err);
 }
 
 void json_message_parser_init(JSONMessageParser *parser,
-                              void (*func)(JSONMessageParser *, GQueue *))
+                              void (*emit)(void *opaque, QObject *json,
+                                           Error *err),
+                              void *opaque, va_list *ap)
 {
-    parser->emit = func;
+    parser->emit = emit;
+    parser->opaque = opaque;
+    parser->ap = ap;
     parser->brace_count = 0;
     parser->bracket_count = 0;
     parser->tokens = g_queue_new();
diff --git a/qobject/qjson.c b/qobject/qjson.c
index ab4040f235..7395556069 100644
--- a/qobject/qjson.c
+++ b/qobject/qjson.c
@@ -13,8 +13,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp/json-lexer.h"
-#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/json-streamer.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qbool.h"
@@ -27,16 +25,16 @@
 typedef struct JSONParsingState
 {
     JSONMessageParser parser;
-    va_list *ap;
     QObject *result;
     Error *err;
 } JSONParsingState;
 
-static void parse_json(JSONMessageParser *parser, GQueue *tokens)
+static void consume_json(void *opaque, QObject *json, Error *err)
 {
-    JSONParsingState *s = container_of(parser, JSONParsingState, parser);
+    JSONParsingState *s = opaque;
 
-    s->result = json_parser_parse_err(tokens, s->ap, &s->err);
+    s->result = json;
+    error_propagate(&s->err, err);
 }
 
 /*
@@ -54,9 +52,7 @@ static QObject *qobject_from_jsonv(const char *string, va_list *ap,
 {
     JSONParsingState state = {};
 
-    state.ap = ap;
-
-    json_message_parser_init(&state.parser, parse_json);
+    json_message_parser_init(&state.parser, consume_json, &state, ap);
     json_message_parser_feed(&state.parser, string, strlen(string));
     json_message_parser_flush(&state.parser);
     json_message_parser_destroy(&state.parser);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index defc21fa04..604886a1a2 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1438,7 +1438,6 @@ static void multiple_values(void)
     qobject_unref(obj);
 
     /* BUG simultaneously succeeds and fails */
-    /* BUG calls json_parser_parse() with errp pointing to non-null */
     obj = qobject_from_json("} true", &err);
     g_assert(qbool_get_bool(qobject_to(QBool, obj)));
     error_free_or_abort(&err);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index af2a24e796..1f3b0cb1b1 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -21,9 +21,9 @@
 #include <sys/un.h>
 
 #include "libqtest.h"
+#include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
-#include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/json-streamer.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
@@ -446,12 +446,10 @@ typedef struct {
     QDict *response;
 } QMPResponseParser;
 
-static void qmp_response(JSONMessageParser *parser, GQueue *tokens)
+static void qmp_response(void *opaque, QObject *obj, Error *err)
 {
-    QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser);
-    QObject *obj;
+    QMPResponseParser *qmp = opaque;
 
-    obj = json_parser_parse(tokens, NULL);
     if (!obj) {
         fprintf(stderr, "QMP JSON response parsing failed\n");
         abort();
@@ -468,7 +466,7 @@ QDict *qmp_fd_receive(int fd)
     bool log = getenv("QTEST_LOG") != NULL;
 
     qmp.response = NULL;
-    json_message_parser_init(&qmp.parser, qmp_response);
+    json_message_parser_init(&qmp.parser, qmp_response, &qmp, NULL);
     while (!qmp.response) {
         ssize_t len;
         char c;
-- 
cgit v1.2.3-55-g7522


From 84a56f38b23440cb3127eaffe4e495826a29f18c Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:06 +0200
Subject: json: Pass lexical errors and limit violations to callback

The callback to consume JSON values takes QObject *json, Error *err.
If both are null, the callback is supposed to make up an error by
itself.  This sucks.

qjson.c's consume_json() neglects to do so, which makes
qobject_from_json() null instead of failing.  I consider that a bug.

The culprit is json_message_process_token(): it passes two null
pointers when it runs into a lexical error or a limit violation.  Fix
it to pass a proper Error object then.  Update the callbacks:

* monitor.c's handle_qmp_command(): the code to make up an error is
  now dead, drop it.

* qga/main.c's process_event(): lumps the "both null" case together
  with the "not a JSON object" case.  The former is now gone.  The
  error message "Invalid JSON syntax" is misleading for the latter.
  Improve it to "Input must be a JSON object".

* qobject/qjson.c's consume_json(): no update; check-qjson
  demonstrates qobject_from_json() now sets an error on lexical
  errors, but still doesn't on some other errors.

* tests/libqtest.c's qmp_response(): the Error object is now reliable,
  so use it to improve the error message.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-40-armbru@redhat.com>
---
 include/qapi/qmp/qerror.h |  3 ---
 monitor.c                 |  5 +----
 qga/main.c                |  3 ++-
 qobject/json-lexer.c      |  3 +--
 qobject/json-streamer.c   | 22 ++++++++++++++++------
 tests/check-qjson.c       | 15 ++++++++-------
 tests/libqtest.c          |  7 +++++--
 7 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index c82360f429..145571f618 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -61,9 +61,6 @@
 #define QERR_IO_ERROR \
     "An IO error has occurred"
 
-#define QERR_JSON_PARSING \
-    "Invalid JSON syntax"
-
 #define QERR_MIGRATION_ACTIVE \
     "There's a migration process in progress"
 
diff --git a/monitor.c b/monitor.c
index 08f799a7bb..3dbdcb5190 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4262,10 +4262,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err)
     QDict *qdict;
     QMPRequest *req_obj;
 
-    if (!req && !err) {
-        /* json_parser_parse() sucks: can fail without setting @err */
-        error_setg(&err, QERR_JSON_PARSING);
-    }
+    assert(!req != !err);
 
     qdict = qobject_to(QDict, req);
     if (qdict) {
diff --git a/qga/main.c b/qga/main.c
index 2fc49d00d8..b74e1241ef 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -603,12 +603,13 @@ static void process_event(void *opaque, QObject *obj, Error *err)
     int ret;
 
     g_debug("process_event: called");
+    assert(!obj != !err);
     if (err) {
         goto err;
     }
     req = qobject_to(QDict, obj);
     if (!req) {
-        error_setg(&err, QERR_JSON_PARSING);
+        error_setg(&err, "Input must be a JSON object");
         goto err;
     }
     if (!qdict_haskey(req, "execute")) {
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 96fe13621d..7c31c2c8ff 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -334,8 +334,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
             /* XXX: To avoid having previous bad input leaving the parser in an
              * unresponsive state where we consume unpredictable amounts of
              * subsequent "good" input, percolate this error state up to the
-             * tokenizer/parser by forcing a NULL object to be emitted, then
-             * reset state.
+             * parser by emitting a JSON_ERROR token, then reset lexer state.
              *
              * Also note that this handling is required for reliable channel
              * negotiation between QMP and the guest agent, since chr(0xFF)
diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
index a373e0114a..e372ecc895 100644
--- a/qobject/json-streamer.c
+++ b/qobject/json-streamer.c
@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qapi/error.h"
 #include "qapi/qmp/json-lexer.h"
 #include "qapi/qmp/json-parser.h"
 #include "qapi/qmp/json-streamer.h"
@@ -57,6 +58,7 @@ void json_message_process_token(JSONLexer *lexer, GString *input,
         parser->bracket_count--;
         break;
     case JSON_ERROR:
+        error_setg(&err, "JSON parse error, stray '%s'", input->str);
         goto out_emit;
     default:
         break;
@@ -82,12 +84,20 @@ void json_message_process_token(JSONLexer *lexer, GString *input,
         goto out_emit;
     }
 
-    if (parser->token_size > MAX_TOKEN_SIZE ||
-               g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT ||
-               parser->bracket_count + parser->brace_count > MAX_NESTING) {
-        /* Security consideration, we limit total memory allocated per object
-         * and the maximum recursion depth that a message can force.
-         */
+    /*
+     * Security consideration, we limit total memory allocated per object
+     * and the maximum recursion depth that a message can force.
+     */
+    if (parser->token_size > MAX_TOKEN_SIZE) {
+        error_setg(&err, "JSON token size limit exceeded");
+        goto out_emit;
+    }
+    if (g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT) {
+        error_setg(&err, "JSON token count limit exceeded");
+        goto out_emit;
+    }
+    if (parser->bracket_count + parser->brace_count > MAX_NESTING) {
+        error_setg(&err, "JSON nesting depth limit exceeded");
         goto out_emit;
     }
 
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 604886a1a2..d6fda0786f 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1021,6 +1021,7 @@ static void interpolation_unknown(void)
     }
     g_test_trap_subprocess(NULL, 0, 0);
     g_test_trap_assert_failed();
+    g_test_trap_assert_stderr("*Unexpected error*stray '%x'*");
 }
 
 static void interpolation_string(void)
@@ -1296,11 +1297,11 @@ static void junk_input(void)
     QObject *obj;
 
     obj = qobject_from_json("@", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 
     obj = qobject_from_json("{\x01", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 
     obj = qobject_from_json("[0\xFF]", &err);
@@ -1308,11 +1309,11 @@ static void junk_input(void)
     g_assert(obj == NULL);
 
     obj = qobject_from_json("00", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 
     obj = qobject_from_json("[1e", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 
     obj = qobject_from_json("truer", &err);
@@ -1324,7 +1325,7 @@ static void unterminated_string(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("\"abc", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1332,7 +1333,7 @@ static void unterminated_sq_string(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("'abc", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1340,7 +1341,7 @@ static void unterminated_escape(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("\"abc\\\"", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 1f3b0cb1b1..5973a67652 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -450,8 +450,11 @@ static void qmp_response(void *opaque, QObject *obj, Error *err)
 {
     QMPResponseParser *qmp = opaque;
 
-    if (!obj) {
-        fprintf(stderr, "QMP JSON response parsing failed\n");
+    assert(!obj != !err);
+
+    if (err) {
+        error_prepend(&err, "QMP JSON response parsing failed: ");
+        error_report_err(err);
         abort();
     }
 
-- 
cgit v1.2.3-55-g7522


From f7617d45d4652ae10d38bd0c917d7488d155cccb Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:07 +0200
Subject: json: Leave rejecting invalid interpolation to parser

Both lexer and parser reject invalid interpolation specifications.
The parser's check is useless.

The lexer ends the token right after the first bad character.  This
tends to lead to suboptimal error reporting.  For instance, input

    [ %04d ]

produces the tokens

    JSON_LSQUARE  [
    JSON_ERROR    %0
    JSON_INTEGER  4
    JSON_KEYWORD  d
    JSON_RSQUARE  ]

The parser then yields an error, an object and two more errors:

    error: Invalid JSON syntax
    object: 4
    error: JSON parse error, invalid keyword
    error: JSON parse error, expecting value

Dumb down the lexer to accept [A-Za-z0-9]*.  The parser's check is now
used.  Emit a proper error there.

The lexer now produces

    JSON_LSQUARE  [
    JSON_INTERP   %04d
    JSON_RSQUARE  ]

and the parser reports just

    JSON parse error, invalid interpolation '%04d'

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-41-armbru@redhat.com>
---
 qobject/json-lexer.c  | 44 ++++++--------------------------------------
 qobject/json-parser.c |  1 +
 tests/check-qjson.c   |  3 ++-
 3 files changed, 9 insertions(+), 39 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 7c31c2c8ff..f1a4b5a430 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -93,7 +93,8 @@
  *   (apostrophe) instead of %x22 (quotation mark), and can't contain
  *   unescaped apostrophe, but can contain unescaped quotation mark.
  * - Interpolation, if enabled:
- *   interpolation = %((l|ll|I64)[du]|[ipsf])
+ *   The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid
+ *   ones to the parser.
  *
  * Note:
  * - Input must be encoded in modified UTF-8.
@@ -116,11 +117,6 @@ enum json_lexer_state {
     IN_NEG_NONZERO_NUMBER,
     IN_KEYWORD,
     IN_INTERP,
-    IN_INTERP_L,
-    IN_INTERP_LL,
-    IN_INTERP_I,
-    IN_INTERP_I6,
-    IN_INTERP_I64,
     IN_WHITESPACE,
     IN_START,
     IN_START_INTERP,            /* must be IN_START + 1 */
@@ -224,39 +220,11 @@ static const uint8_t json_lexer[][256] =  {
     },
 
     /* interpolation */
-    [IN_INTERP_LL] = {
-        ['d'] = JSON_INTERP,
-        ['u'] = JSON_INTERP,
-    },
-
-    [IN_INTERP_L] = {
-        ['d'] = JSON_INTERP,
-        ['l'] = IN_INTERP_LL,
-        ['u'] = JSON_INTERP,
-    },
-
-    [IN_INTERP_I64] = {
-        ['d'] = JSON_INTERP,
-        ['u'] = JSON_INTERP,
-    },
-
-    [IN_INTERP_I6] = {
-        ['4'] = IN_INTERP_I64,
-    },
-
-    [IN_INTERP_I] = {
-        ['6'] = IN_INTERP_I6,
-    },
-
     [IN_INTERP] = {
-        ['d'] = JSON_INTERP,
-        ['i'] = JSON_INTERP,
-        ['p'] = JSON_INTERP,
-        ['s'] = JSON_INTERP,
-        ['u'] = JSON_INTERP,
-        ['f'] = JSON_INTERP,
-        ['l'] = IN_INTERP_L,
-        ['I'] = IN_INTERP_I,
+        TERMINAL(JSON_INTERP),
+        ['A' ... 'Z'] = IN_INTERP,
+        ['a' ... 'z'] = IN_INTERP,
+        ['0' ... '9'] = IN_INTERP,
     },
 
     /*
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 2855eaaeca..e61cee9e8a 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -453,6 +453,7 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap)
     } else if (!strcmp(token->str, "%f")) {
         return QOBJECT(qnum_from_double(va_arg(*ap, double)));
     }
+    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
     return NULL;
 }
 
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index d6fda0786f..83f8a0e6e3 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1021,7 +1021,8 @@ static void interpolation_unknown(void)
     }
     g_test_trap_subprocess(NULL, 0, 0);
     g_test_trap_assert_failed();
-    g_test_trap_assert_stderr("*Unexpected error*stray '%x'*");
+    g_test_trap_assert_stderr("*Unexpected error*"
+                              "invalid interpolation '%x'*");
 }
 
 static void interpolation_string(void)
-- 
cgit v1.2.3-55-g7522


From 53a0d616fecab09870411573afc58fd24ffb8648 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:08 +0200
Subject: json: Replace %I64d, %I64u by %PRId64, %PRIu64

Support for %I64d got added in commit 2c0d4b36e7f "json: fix PRId64 on
Win32".  We had to hard-code I64d because we used the lexer's finite
state machine to check interpolations.  No more, so clean this up.

Additional conversion specifications would be easy enough to implement
when needed.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-42-armbru@redhat.com>
---
 qobject/json-parser.c | 10 ++++++----
 tests/check-qjson.c   | 10 ++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index e61cee9e8a..27e873ad3b 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -438,16 +438,18 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap)
         return QOBJECT(qnum_from_int(va_arg(*ap, int)));
     } else if (!strcmp(token->str, "%ld")) {
         return QOBJECT(qnum_from_int(va_arg(*ap, long)));
-    } else if (!strcmp(token->str, "%lld") ||
-               !strcmp(token->str, "%I64d")) {
+    } else if (!strcmp(token->str, "%lld")) {
         return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
+    } else if (!strcmp(token->str, "%" PRId64)) {
+        return QOBJECT(qnum_from_int(va_arg(*ap, int64_t)));
     } else if (!strcmp(token->str, "%u")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
     } else if (!strcmp(token->str, "%lu")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
-    } else if (!strcmp(token->str, "%llu") ||
-               !strcmp(token->str, "%I64u")) {
+    } else if (!strcmp(token->str, "%llu")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
+    } else if (!strcmp(token->str, "%" PRIu64)) {
+        return QOBJECT(qnum_from_uint(va_arg(*ap, uint64_t)));
     } else if (!strcmp(token->str, "%s")) {
         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
     } else if (!strcmp(token->str, "%f")) {
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 83f8a0e6e3..f344ad921c 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -945,9 +945,11 @@ static void keyword_literal(void)
 static void interpolation_valid(void)
 {
     long long value_lld = 0x123456789abcdefLL;
+    int64_t value_d64 = value_lld;
     long value_ld = (long)value_lld;
     int value_d = (int)value_lld;
     unsigned long long value_llu = 0xfedcba9876543210ULL;
+    uint64_t value_u64 = value_llu;
     unsigned long value_lu = (unsigned long)value_llu;
     unsigned value_u = (unsigned)value_llu;
     double value_f = 2.323423423;
@@ -985,6 +987,10 @@ static void interpolation_valid(void)
     g_assert_cmpint(qnum_get_int(qnum), ==, value_lld);
     qobject_unref(qnum);
 
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRId64, value_d64));
+    g_assert_cmpint(qnum_get_int(qnum), ==, value_lld);
+    qobject_unref(qnum);
+
     qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%u", value_u));
     g_assert_cmpuint(qnum_get_uint(qnum), ==, value_u);
     qobject_unref(qnum);
@@ -997,6 +1003,10 @@ static void interpolation_valid(void)
     g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu);
     qobject_unref(qnum);
 
+    qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRIu64, value_u64));
+    g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu);
+    qobject_unref(qnum);
+
     qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", value_f));
     g_assert(qnum_get_double(qnum) == value_f);
     qobject_unref(qnum);
-- 
cgit v1.2.3-55-g7522


From 2a4794ba146d6560bd77ca840ff6908f81d585f4 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:10 +0200
Subject: qjson: Fix qobject_from_json() & friends for multiple values

qobject_from_json() & friends use the consume_json() callback to
receive either a value or an error from the parser.

When they are fed a string that contains more than either one JSON
value or one JSON syntax error, consume_json() gets called multiple
times.

When the last call receives a value, qobject_from_json() returns that
value.  Any other values are leaked.

When any call receives an error, qobject_from_json() sets the first
error received.  Any other errors are thrown away.

When values follow errors, qobject_from_json() returns both a value
and sets an error.  That's bad.  Impact:

* block.c's parse_json_protocol() ignores and leaks the value.  It's
  used to to parse pseudo-filenames starting with "json:".  The
  pseudo-filenames can come from the user or from image meta-data such
  as a QCOW2 image's backing file name.

* vl.c's parse_display_qapi() ignores and leaks the error.  It's used
  to parse the argument of command line option -display.

* vl.c's main() case QEMU_OPTION_blockdev ignores the error and leaves
  it in @err.  main() will then pass a pointer to a non-null Error *
  to net_init_clients(), which is forbidden.  It can lead to assertion
  failure or other misbehavior.

* check-qjson.c's multiple_values() demonstrates the badness.

* The other callers are not affected since they only pass strings with
  exactly one JSON value or, in the case of negative tests, one
  error.

The impact on the _nofail() functions is relatively harmless.  They
abort when any call receives an error.  Else they return the last
value, and leak the others, if any.

Fix consume_json() as follows.  On the first call, save value and
error as before.  On subsequent calls, if any, don't save them.  If
the first call saved a value, the next call, if any, replaces the
value by an "Expecting at most one JSON value" error.  Take care not
to leak values or errors that aren't saved.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-44-armbru@redhat.com>
---
 qobject/qjson.c     | 15 ++++++++++++++-
 tests/check-qjson.c | 10 +++-------
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/qjson.c b/qobject/qjson.c
index 7395556069..7f69036487 100644
--- a/qobject/qjson.c
+++ b/qobject/qjson.c
@@ -33,8 +33,21 @@ static void consume_json(void *opaque, QObject *json, Error *err)
 {
     JSONParsingState *s = opaque;
 
+    assert(!json != !err);
+    assert(!s->result || !s->err);
+
+    if (s->result) {
+        qobject_unref(s->result);
+        s->result = NULL;
+        error_setg(&s->err, "Expecting at most one JSON value");
+    }
+    if (s->err) {
+        qobject_unref(json);
+        error_free(err);
+        return;
+    }
     s->result = json;
-    error_propagate(&s->err, err);
+    s->err = err;
 }
 
 /*
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index f344ad921c..f9438370d9 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1443,17 +1443,13 @@ static void multiple_values(void)
     Error *err = NULL;
     QObject *obj;
 
-    /* BUG this leaks the syntax tree for "false" */
     obj = qobject_from_json("false true", &err);
-    g_assert(qbool_get_bool(qobject_to(QBool, obj)));
-    g_assert(!err);
-    qobject_unref(obj);
+    error_free_or_abort(&err);
+    g_assert(obj == NULL);
 
-    /* BUG simultaneously succeeds and fails */
     obj = qobject_from_json("} true", &err);
-    g_assert(qbool_get_bool(qobject_to(QBool, obj)));
     error_free_or_abort(&err);
-    qobject_unref(obj);
+    g_assert(obj == NULL);
 }
 
 int main(int argc, char **argv)
-- 
cgit v1.2.3-55-g7522


From f9277915ee7b2654f5347c4c261c8a0651fdd561 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:12 +0200
Subject: json: Fix streamer not to ignore trailing unterminated structures

json_message_process_token() accumulates tokens until it got the
sequence of tokens that comprise a single JSON value (it counts curly
braces and square brackets to decide).  It feeds those token sequences
to json_parser_parse().  If a non-empty sequence of tokens remains at
the end of the parse, it's silently ignored.  check-qjson.c cases
unterminated_array(), unterminated_array_comma(), unterminated_dict(),
unterminated_dict_comma() demonstrate this bug.

Fix as follows.  Introduce a JSON_END_OF_INPUT token.  When the
streamer receives it, it feeds the accumulated tokens to
json_parser_parse().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-46-armbru@redhat.com>
---
 include/qapi/qmp/json-lexer.h | 1 +
 qobject/json-lexer.c          | 2 ++
 qobject/json-streamer.c       | 8 ++++++++
 tests/check-qjson.c           | 8 ++++----
 4 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h
index afa84cb910..508fc7bdaf 100644
--- a/include/qapi/qmp/json-lexer.h
+++ b/include/qapi/qmp/json-lexer.h
@@ -30,6 +30,7 @@ typedef enum json_token_type {
     JSON_INTERP,
     JSON_SKIP,
     JSON_ERROR,
+    JSON_END_OF_INPUT,
 } JSONTokenType;
 
 typedef struct JSONLexer {
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 01417dca9d..a728c32faa 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -347,6 +347,8 @@ void json_lexer_flush(JSONLexer *lexer)
     if (lexer->state != lexer->start_state) {
         json_lexer_feed_char(lexer, 0, true);
     }
+    json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT,
+                               lexer->x, lexer->y);
 }
 
 void json_lexer_destroy(JSONLexer *lexer)
diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
index e372ecc895..674dfe6e85 100644
--- a/qobject/json-streamer.c
+++ b/qobject/json-streamer.c
@@ -60,6 +60,13 @@ void json_message_process_token(JSONLexer *lexer, GString *input,
     case JSON_ERROR:
         error_setg(&err, "JSON parse error, stray '%s'", input->str);
         goto out_emit;
+    case JSON_END_OF_INPUT:
+        if (g_queue_is_empty(parser->tokens)) {
+            return;
+        }
+        json = json_parser_parse(parser->tokens, parser->ap, &err);
+        parser->tokens = NULL;
+        goto out_emit;
     default:
         break;
     }
@@ -137,6 +144,7 @@ void json_message_parser_feed(JSONMessageParser *parser,
 void json_message_parser_flush(JSONMessageParser *parser)
 {
     json_lexer_flush(&parser->lexer);
+    assert(g_queue_is_empty(parser->tokens));
 }
 
 void json_message_parser_destroy(JSONMessageParser *parser)
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index f9438370d9..0ca4b3c823 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1360,7 +1360,7 @@ static void unterminated_array(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("[32", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1368,7 +1368,7 @@ static void unterminated_array_comma(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("[32,", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1384,7 +1384,7 @@ static void unterminated_dict(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("{'abc':32", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
@@ -1392,7 +1392,7 @@ static void unterminated_dict_comma(void)
 {
     Error *err = NULL;
     QObject *obj = qobject_from_json("{'abc':32,", &err);
-    g_assert(!err);             /* BUG */
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
-- 
cgit v1.2.3-55-g7522


From dd98e8481992741a6b5ec0bdfcee05c1c8f602d6 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:14 +0200
Subject: qjson: Have qobject_from_json() & friends reject empty and blank

The last case where qobject_from_json() & friends return null without
setting an error is empty or blank input.  Callers:

* block.c's parse_json_protocol() reports "Could not parse the JSON
  options".  It's marked as a work-around, because it also covered
  actual bugs, but they got fixed in the previous few commits.

* qobject_input_visitor_new_str() reports "JSON parse error".  Also
  marked as work-around.  The recent fixes have made this unreachable,
  because it currently gets called only for input starting with '{'.

* check-qjson.c's empty_input() and blank_input() demonstrate the
  behavior.

* The other callers are not affected since they only pass input with
  exactly one JSON value or, in the case of negative tests, one error.

Fail with "Expecting a JSON value" instead of returning null, and
simplify callers.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-48-armbru@redhat.com>
---
 block.c                      |  5 -----
 qapi/qobject-input-visitor.c |  5 -----
 qobject/qjson.c              |  4 ++++
 tests/check-qjson.c          | 12 ++++++++++--
 4 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/block.c b/block.c
index 6161dbe3eb..0dbb1fcc7b 100644
--- a/block.c
+++ b/block.c
@@ -1478,11 +1478,6 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
 
     options_obj = qobject_from_json(filename, errp);
     if (!options_obj) {
-        /* Work around qobject_from_json() lossage TODO fix that */
-        if (errp && !*errp) {
-            error_setg(errp, "Could not parse the JSON options");
-            return NULL;
-        }
         error_prepend(errp, "Could not parse the JSON options: ");
         return NULL;
     }
diff --git a/qapi/qobject-input-visitor.c b/qapi/qobject-input-visitor.c
index da57f4cc24..3e88b27f9e 100644
--- a/qapi/qobject-input-visitor.c
+++ b/qapi/qobject-input-visitor.c
@@ -725,11 +725,6 @@ Visitor *qobject_input_visitor_new_str(const char *str,
     if (is_json) {
         obj = qobject_from_json(str, errp);
         if (!obj) {
-            /* Work around qobject_from_json() lossage TODO fix that */
-            if (errp && !*errp) {
-                error_setg(errp, "JSON parse error");
-                return NULL;
-            }
             return NULL;
         }
         args = qobject_to(QDict, obj);
diff --git a/qobject/qjson.c b/qobject/qjson.c
index 7f69036487..b9ccae2c2a 100644
--- a/qobject/qjson.c
+++ b/qobject/qjson.c
@@ -70,6 +70,10 @@ static QObject *qobject_from_jsonv(const char *string, va_list *ap,
     json_message_parser_flush(&state.parser);
     json_message_parser_destroy(&state.parser);
 
+    if (!state.result && !state.err) {
+        error_setg(&state.err, "Expecting a JSON value");
+    }
+
     error_propagate(errp, state.err);
     return state.result;
 }
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 0ca4b3c823..936258ddd4 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1291,13 +1291,21 @@ static void simple_interpolation(void)
 
 static void empty_input(void)
 {
-    QObject *obj = qobject_from_json("", &error_abort);
+    Error *err = NULL;
+    QObject *obj;
+
+    obj = qobject_from_json("", &err);
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
 static void blank_input(void)
 {
-    QObject *obj = qobject_from_json("\n ", &error_abort);
+    Error *err = NULL;
+    QObject *obj;
+
+    obj = qobject_from_json("\n ", &err);
+    error_free_or_abort(&err);
     g_assert(obj == NULL);
 }
 
-- 
cgit v1.2.3-55-g7522


From 16a485992112be1c8b47b58b0124357db9037093 Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:23 +0200
Subject: json: Improve safety of qobject_from_jsonf_nofail() & friends

The JSON parser optionally supports interpolation.  This is used to
build QObjects by parsing string templates.  The templates are C
literals, so parse errors (such as invalid interpolation
specifications) are actually programming errors.  Consequently, the
functions providing parsing with interpolation
(qobject_from_jsonf_nofail(), qobject_from_vjsonf_nofail(),
qdict_from_jsonf_nofail(), qdict_from_vjsonf_nofail()) pass
&error_abort to the parser.

However, there's another, more dangerous kind of programming error:
since we use va_arg() to get the value to interpolate, behavior is
undefined when the variable argument isn't consistent with the
interpolation specification.

The same problem exists with printf()-like functions, and the solution
is to have the compiler check consistency.  This is what
GCC_FMT_ATTR() is about.

To enable this type checking for interpolation as well, we carefully
chose our interpolation specifications to match printf conversion
specifications, and decorate functions parsing templates with
GCC_FMT_ATTR().

Note that this only protects against undefined behavior due to type
errors.  It can't protect against use of invalid interpolation
specifications that happen to be valid printf conversion
specifications.

However, there's still a gaping hole in the type checking: GCC
recognizes '%' as start of printf conversion specification anywhere in
the template, but the parser recognizes it only outside JSON strings.
For instance, if someone were to pass a "{ '%s': %d }" template, GCC
would require a char * and an int argument, but the parser would
va_arg() only an int argument, resulting in undefined behavior.

Avoid undefined behavior by catching the programming error at run
time: have the parser recognize and reject '%' in JSON strings.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-57-armbru@redhat.com>
---
 qobject/json-parser.c | 12 ++++++++++--
 tests/check-qjson.c   | 17 +++++++----------
 2 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 273f448a52..63e9229f1c 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -144,7 +144,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 
     while (*ptr != quote) {
         assert(*ptr);
-        if (*ptr == '\\') {
+        switch (*ptr) {
+        case '\\':
             beg = ptr++;
             switch (*ptr++) {
             case '"':
@@ -205,7 +206,14 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
                 parse_error(ctxt, token, "invalid escape sequence in string");
                 goto out;
             }
-        } else {
+            break;
+        case '%':
+            if (ctxt->ap) {
+                parse_error(ctxt, token, "can't interpolate into string");
+                goto out;
+            }
+            /* fall through */
+        default:
             cp = mod_utf8_codepoint(ptr, 6, &end);
             if (cp < 0) {
                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 936258ddd4..a1854573de 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1037,16 +1037,13 @@ static void interpolation_unknown(void)
 
 static void interpolation_string(void)
 {
-    QLitObject decoded = QLIT_QLIST(((QLitObject[]){
-            QLIT_QSTR("%s"),
-            QLIT_QSTR("eins"),
-            {}}));
-    QObject *qobj;
-
-    /* Dangerous misfeature: % is silently ignored in strings */
-    qobj = qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei");
-    g_assert(qlit_equal_qobject(&decoded, qobj));
-    qobject_unref(qobj);
+    if (g_test_subprocess()) {
+        qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei");
+    }
+    g_test_trap_subprocess(NULL, 0, 0);
+    g_test_trap_assert_failed();
+    g_test_trap_assert_stderr("*Unexpected error*"
+                              "can't interpolate into string*");
 }
 
 static void simple_dict(void)
-- 
cgit v1.2.3-55-g7522


From 8bca4613e6cddd948895b8db3def05950463495b Mon Sep 17 00:00:00 2001
From: Markus Armbruster
Date: Thu, 23 Aug 2018 18:40:24 +0200
Subject: json: Support %% in JSON strings when interpolating

The previous commit makes JSON strings containing '%' awkward to
express in templates: you'd have to mask the '%' with an Unicode
escape \u0025.  No template currently contains such JSON strings.
Support the printf conversion specification %% in JSON strings as a
convenience anyway, because it's trivially easy to do.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-58-armbru@redhat.com>
---
 qobject/json-parser.c | 3 ++-
 tests/check-qjson.c   | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'tests/check-qjson.c')

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 63e9229f1c..3318b8dad0 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -208,10 +208,11 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
             }
             break;
         case '%':
-            if (ctxt->ap) {
+            if (ctxt->ap && ptr[1] != '%') {
                 parse_error(ctxt, token, "can't interpolate into string");
                 goto out;
             }
+            ptr++;
             /* fall through */
         default:
             cp = mod_utf8_codepoint(ptr, 6, &end);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index a1854573de..cc13f3d41e 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -1270,7 +1270,7 @@ static void simple_interpolation(void)
     QObject *obj;
     QLitObject decoded = QLIT_QLIST(((QLitObject[]){
             QLIT_QNUM(1),
-            QLIT_QNUM(2),
+            QLIT_QSTR("100%"),
             QLIT_QLIST(((QLitObject[]){
                         QLIT_QNUM(32),
                         QLIT_QNUM(42),
@@ -1280,7 +1280,7 @@ static void simple_interpolation(void)
     embedded_obj = qobject_from_json("[32, 42]", &error_abort);
     g_assert(embedded_obj != NULL);
 
-    obj = qobject_from_jsonf_nofail("[%d, 2, %p]", 1, embedded_obj);
+    obj = qobject_from_jsonf_nofail("[%d, '100%%', %p]", 1, embedded_obj);
     g_assert(qlit_equal_qobject(&decoded, obj));
 
     qobject_unref(obj);
-- 
cgit v1.2.3-55-g7522