summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/mbsalign.h3
-rw-r--r--lib/mbsalign.c80
2 files changed, 83 insertions, 0 deletions
diff --git a/include/mbsalign.h b/include/mbsalign.h
index 6bdb50d3f..0c28e6f69 100644
--- a/include/mbsalign.h
+++ b/include/mbsalign.h
@@ -57,4 +57,7 @@ extern char *mbs_safe_encode(const char *s, size_t *width);
extern char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars);
extern size_t mbs_safe_encode_size(size_t bytes);
+extern char *mbs_invalid_encode(const char *s, size_t *width);
+extern char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf);
+
#endif /* UTIL_LINUX_MBSALIGN_H */
diff --git a/lib/mbsalign.c b/lib/mbsalign.c
index b0e1004a0..78ab12a0d 100644
--- a/lib/mbsalign.c
+++ b/lib/mbsalign.c
@@ -194,6 +194,67 @@ char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const c
return buf;
}
+/*
+ * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
+ * @width returns number of cells. The @safechars are not encoded.
+ *
+ * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
+ * bytes.
+ */
+char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
+{
+ const char *p = s;
+ char *r;
+ size_t sz = s ? strlen(s) : 0;
+
+#ifdef HAVE_WIDECHAR
+ mbstate_t st;
+ memset(&st, 0, sizeof(st));
+#endif
+ if (!sz || !buf)
+ return NULL;
+
+ r = buf;
+ *width = 0;
+
+ while (p && *p) {
+#ifdef HAVE_WIDECHAR
+ wchar_t wc;
+ size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
+
+ if (len == 0)
+ break; /* end of string */
+
+ if (len == (size_t) -1 || len == (size_t) -2) {
+ len = 1;
+ /*
+ * Not valid multibyte sequence -- maybe it's
+ * printable char according to the current locales.
+ */
+ if (!isprint((unsigned char) *p)) {
+ sprintf(r, "\\x%02x", (unsigned char) *p);
+ r += 4;
+ *width += 4;
+ } else {
+ (*width)++;
+ *r++ = *p;
+ }
+ } else {
+ memcpy(r, p, len);
+ r += len;
+ *width += wcwidth(wc);
+ }
+ p += len;
+#else
+ *r++ = *p++;
+ (*width)++;
+#endif
+ }
+
+ *r = '\0';
+ return buf;
+}
+
size_t mbs_safe_encode_size(size_t bytes)
{
return (bytes * 4) + 1;
@@ -218,6 +279,25 @@ char *mbs_safe_encode(const char *s, size_t *width)
return ret;
}
+/*
+ * Returns allocated string where all broken widechars chars are
+ * replaced with \x?? hex sequence.
+ */
+char *mbs_invalid_encode(const char *s, size_t *width)
+{
+ size_t sz = s ? strlen(s) : 0;
+ char *buf, *ret = NULL;
+
+ if (!sz)
+ return NULL;
+ buf = malloc(mbs_safe_encode_size(sz));
+ if (buf)
+ ret = mbs_invalid_encode_to_buffer(s, width, buf);
+ if (!ret)
+ free(buf);
+ return ret;
+}
+
#ifdef HAVE_WIDECHAR
static bool