diff options
-rw-r--r-- | include/mbsalign.h | 3 | ||||
-rw-r--r-- | lib/mbsalign.c | 80 |
2 files changed, 83 insertions, 0 deletions
diff --git a/include/mbsalign.h b/include/mbsalign.h index 6bdb50d3f..0c28e6f69 100644 --- a/include/mbsalign.h +++ b/include/mbsalign.h @@ -57,4 +57,7 @@ extern char *mbs_safe_encode(const char *s, size_t *width); extern char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars); extern size_t mbs_safe_encode_size(size_t bytes); +extern char *mbs_invalid_encode(const char *s, size_t *width); +extern char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf); + #endif /* UTIL_LINUX_MBSALIGN_H */ diff --git a/lib/mbsalign.c b/lib/mbsalign.c index b0e1004a0..78ab12a0d 100644 --- a/lib/mbsalign.c +++ b/lib/mbsalign.c @@ -194,6 +194,67 @@ char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const c return buf; } +/* + * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The + * @width returns number of cells. The @safechars are not encoded. + * + * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s))) + * bytes. + */ +char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf) +{ + const char *p = s; + char *r; + size_t sz = s ? strlen(s) : 0; + +#ifdef HAVE_WIDECHAR + mbstate_t st; + memset(&st, 0, sizeof(st)); +#endif + if (!sz || !buf) + return NULL; + + r = buf; + *width = 0; + + while (p && *p) { +#ifdef HAVE_WIDECHAR + wchar_t wc; + size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st); + + if (len == 0) + break; /* end of string */ + + if (len == (size_t) -1 || len == (size_t) -2) { + len = 1; + /* + * Not valid multibyte sequence -- maybe it's + * printable char according to the current locales. + */ + if (!isprint((unsigned char) *p)) { + sprintf(r, "\\x%02x", (unsigned char) *p); + r += 4; + *width += 4; + } else { + (*width)++; + *r++ = *p; + } + } else { + memcpy(r, p, len); + r += len; + *width += wcwidth(wc); + } + p += len; +#else + *r++ = *p++; + (*width)++; +#endif + } + + *r = '\0'; + return buf; +} + size_t mbs_safe_encode_size(size_t bytes) { return (bytes * 4) + 1; @@ -218,6 +279,25 @@ char *mbs_safe_encode(const char *s, size_t *width) return ret; } +/* + * Returns allocated string where all broken widechars chars are + * replaced with \x?? hex sequence. + */ +char *mbs_invalid_encode(const char *s, size_t *width) +{ + size_t sz = s ? strlen(s) : 0; + char *buf, *ret = NULL; + + if (!sz) + return NULL; + buf = malloc(mbs_safe_encode_size(sz)); + if (buf) + ret = mbs_invalid_encode_to_buffer(s, width, buf); + if (!ret) + free(buf); + return ret; +} + #ifdef HAVE_WIDECHAR static bool |