diff options
| author | Peter Maydell | 2018-08-25 11:11:54 +0200 |
|---|---|---|
| committer | Peter Maydell | 2018-08-25 11:11:54 +0200 |
| commit | cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4 (patch) | |
| tree | cdeb23078fab646764f9c0ee38bca585d8be0f89 /util/unicode.c | |
| parent | Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-aug-2018' int... (diff) | |
| parent | json: Update references to RFC 7159 to RFC 8259 (diff) | |
| download | qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.tar.gz qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.tar.xz qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.zip | |
Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-08-24' into staging
QObject patches for 2018-08-24
# gpg: Signature made Fri 24 Aug 2018 20:28:53 BST
# gpg: using RSA key 3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg: aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867 4E5F 3870 B400 EB91 8653
* remotes/armbru/tags/pull-qobject-2018-08-24: (58 commits)
json: Update references to RFC 7159 to RFC 8259
json: Support %% in JSON strings when interpolating
json: Improve safety of qobject_from_jsonf_nofail() & friends
json: Keep interpolation state in JSONParserContext
tests/drive_del-test: Fix harmless JSON interpolation bug
json: Clean up headers
qobject: Drop superfluous includes of qemu-common.h
json: Make JSONToken opaque outside json-parser.c
json: Unbox tokens queue in JSONMessageParser
json: Streamline json_message_process_token()
json: Enforce token count and size limits more tightly
qjson: Have qobject_from_json() & friends reject empty and blank
json: Assert json_parser_parse() consumes all tokens on success
json: Fix streamer not to ignore trailing unterminated structures
json: Fix latent parser aborts at end of input
qjson: Fix qobject_from_json() & friends for multiple values
json: Improve names of lexer states related to numbers
json: Replace %I64d, %I64u by %PRId64, %PRIu64
json: Leave rejecting invalid interpolation to parser
json: Pass lexical errors and limit violations to callback
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'util/unicode.c')
| -rw-r--r-- | util/unicode.c | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/util/unicode.c b/util/unicode.c index a812a35171..8580bc598b 100644 --- a/util/unicode.c +++ b/util/unicode.c @@ -13,6 +13,21 @@ #include "qemu/osdep.h" #include "qemu/unicode.h" +static bool is_valid_codepoint(int codepoint) +{ + if (codepoint > 0x10FFFFu) { + return false; /* beyond Unicode range */ + } + if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF) + || (codepoint & 0xFFFE) == 0xFFFE) { + return false; /* noncharacter */ + } + if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { + return false; /* surrogate code point */ + } + return true; +} + /** * mod_utf8_codepoint: * @s: string encoded in modified UTF-8 @@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end) cp <<= 6; cp |= byte & 0x3F; } - if (cp > 0x10FFFF) { - cp = -1; /* beyond Unicode range */ - } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) - || (cp & 0xFFFE) == 0xFFFE) { - cp = -1; /* noncharacter */ - } else if (cp >= 0xD800 && cp <= 0xDFFF) { - cp = -1; /* surrogate code point */ + if (!is_valid_codepoint(cp)) { + cp = -1; } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { cp = -1; /* overlong, not \xC0\x80 */ } @@ -99,3 +109,48 @@ out: *end = (char *)p; return cp; } + +/** + * mod_utf8_encode: + * @buf: Destination buffer + * @bufsz: size of @buf, at least 5. + * @codepoint: Unicode codepoint to encode + * + * Convert Unicode codepoint @codepoint to modified UTF-8. + * + * Returns: the length of the UTF-8 sequence on success, -1 when + * @codepoint is invalid. + */ +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint) +{ + assert(bufsz >= 5); + + if (!is_valid_codepoint(codepoint)) { + return -1; + } + + if (codepoint > 0 && codepoint <= 0x7F) { + buf[0] = codepoint & 0x7F; + buf[1] = 0; + return 1; + } + if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + buf[1] = 0x80 | (codepoint & 0x3F); + buf[2] = 0; + return 2; + } + if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | (codepoint & 0x3F); + buf[3] = 0; + return 3; + } + buf[0] = 0xF0 | ((codepoint >> 18) & 0x07); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | (codepoint & 0x3F); + buf[4] = 0; + return 4; +} |
