summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorKarel Zak2012-08-06 12:50:09 +0200
committerKarel Zak2012-08-06 12:50:09 +0200
commitfce849325c6409e36617c580bccafe1464803aa8 (patch)
tree08e972def9e091fab3c019c93da34214e37a705a /lib
parentmount: replace control chars in mountpoint name (diff)
downloadkernel-qcow2-util-linux-fce849325c6409e36617c580bccafe1464803aa8.tar.gz
kernel-qcow2-util-linux-fce849325c6409e36617c580bccafe1464803aa8.tar.xz
kernel-qcow2-util-linux-fce849325c6409e36617c580bccafe1464803aa8.zip
lib/tt: count read cells, improve \x?? hex encoding
* use wcwidth() to count real number of columns required for multibyte strings * encode control characters with \x?? in raw and export (NAME=data) outputs * use \x?? for controls and non-printable characters in the default outputs * use \x?? to encode already existing hex sequences, for example /mnt/ugly\x20space ---> /mnt/ugly\x5cx20space this is not used in the default output, but in raw/export outputs only (which is designed for scripts). Signed-off-by: Karel Zak <kzak@redhat.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/tt.c175
1 files changed, 161 insertions, 14 deletions
diff --git a/lib/tt.c b/lib/tt.c
index 6018b50be..51b8c2e07 100644
--- a/lib/tt.c
+++ b/lib/tt.c
@@ -37,8 +37,6 @@ static const struct tt_symbols ascii_tt_symbols = {
};
#ifdef HAVE_WIDECHAR
-#define mbs_width(_s) mbstowcs(NULL, _s, 0)
-
#define UTF_V "\342\224\202" /* U+2502, Vertical line drawing char */
#define UTF_VR "\342\224\234" /* U+251C, Vertical and right */
#define UTF_H "\342\224\200" /* U+2500, Horizontal */
@@ -49,15 +47,133 @@ static const struct tt_symbols utf8_tt_symbols = {
.vert = UTF_V " ",
.right = UTF_UR UTF_H,
};
-
-#else /* !HAVE_WIDECHAR */
-# define mbs_width(_s) strlen(_s)
#endif /* !HAVE_WIDECHAR */
#define is_last_column(_tb, _cl) \
list_last_entry(&(_cl)->cl_columns, &(_tb)->tb_columns)
/*
+ * Counts number of cells in multibyte string. For all control and
+ * non-printable chars is the result width enlarged to store \x?? hex
+ * sequence. See mbs_safe_encode().
+ */
+static size_t mbs_safe_width(const char *s)
+{
+ mbstate_t st;
+ const char *p = s;
+ size_t width = 0;
+
+ memset(&st, 0, sizeof(st));
+
+ while (p && *p) {
+ if (iscntrl((unsigned char) *p)) {
+ width += 4; /* *p encoded to \x?? */
+ p++;
+ }
+#ifdef HAVE_WIDECHAR
+ else {
+ wchar_t wc;
+ size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
+
+ if (len == 0)
+ break;
+ if (len == (size_t) -1 || len == (size_t) -2)
+ return (size_t) -1;
+
+ if (!iswprint(wc))
+ width += len * 4; /* hex encode whole sequence */
+ else
+ width += wcwidth(wc); /* number of cells */
+ p += len;
+ }
+#else
+ else if (!isprint((unsigned char) *p)) {
+ width += 4; /* *p encoded to \x?? */
+ p++;
+ } else {
+ width++;
+ p++;
+ }
+#endif
+ }
+
+ return width;
+}
+
+/*
+ * Returns allocated string where all control and non-printable chars are
+ * replaced with \x?? hex sequence.
+ */
+static char *mbs_safe_encode(const char *s, size_t *width)
+{
+ mbstate_t st;
+ const char *p = s;
+ char *res, *r;
+ size_t sz = s ? strlen(s) : 0;
+
+
+ if (!sz)
+ return NULL;
+
+ memset(&st, 0, sizeof(st));
+
+ res = malloc((sz * 4) + 1);
+ if (!res)
+ return NULL;
+
+ r = res;
+ *width = 0;
+
+ while (p && *p) {
+ if (iscntrl((unsigned char) *p)) {
+ sprintf(r, "\\x%02x", (unsigned char) *p);
+ r += 4;
+ *width += 4;
+ p++;
+ }
+#ifdef HAVE_WIDECHAR
+ else {
+ wchar_t wc;
+ size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
+
+ if (len == 0)
+ break;
+ if (len == (size_t) -1 || len == (size_t) -2)
+ return NULL;
+
+ if (!iswprint(wc)) {
+ size_t i;
+ for (i = 0; i < len; i++) {
+ sprintf(r, "\\x%02x", (unsigned char) *p);
+ r += 4;
+ *width += 4;
+ }
+ } else {
+ memcpy(r, p, len);
+ r += len;
+ *width += wcwidth(wc);
+ }
+ p += len;
+ }
+#else
+ else if (!isprint((unsigned char) *p)) {
+ sprintf(r, "\\x%02x", (unsigned char) *p);
+ p++;
+ r += 4;
+ *width += 4;
+ } else {
+ *r++ = *p++;
+ *width++;
+ }
+#endif
+ }
+
+ *r = '\0';
+
+ return res;
+}
+
+/*
* @flags: TT_FL_* flags (usually TT_FL_{ASCII,RAW})
*
* Returns: newly allocated table
@@ -344,7 +460,10 @@ static void count_column_width(struct tt *tb, struct tt_column *cl,
list_for_each(lp, &tb->tb_lines) {
struct tt_line *ln = list_entry(lp, struct tt_line, ln_lines);
char *data = line_get_data(ln, cl, buf, bufsz);
- size_t len = data ? mbs_width(data) : 0;
+ size_t len = data ? mbs_safe_width(data) : 0;
+
+ if (len == (size_t) -1) /* ignore broken multibyte strings */
+ len = 0;
if (len > cl->width_max)
cl->width_max = len;
@@ -368,7 +487,7 @@ static void count_column_width(struct tt *tb, struct tt_column *cl,
/* check and set minimal column width */
if (cl->name)
- cl->width_min = mbs_width(cl->name);
+ cl->width_min = mbs_safe_width(cl->name);
/* enlarge to minimal width */
if (cl->width < cl->width_min && !(cl->flags & TT_FL_STRICTWIDTH))
@@ -533,9 +652,19 @@ void tt_fputs_quoted(const char *data, FILE *out)
fputc('"', out);
for (p = data; p && *p; p++) {
- if ((unsigned char) *p == 0x22 || !isprint((unsigned char) *p))
+ if ((unsigned char) *p == 0x22 ||
+ !isprint((unsigned char) *p) ||
+ iscntrl((unsigned char) *p)) {
+
fprintf(out, "\\x%02x", *p);
- else
+
+ } else if (*p == '\\' &&
+ *(p + 1) == 'x' &&
+ isxdigit((unsigned char) *(p + 2)) &&
+ isxdigit((unsigned char) *(p + 3))) {
+
+ fprintf(out, "\\x%02x", *p);
+ } else
fputc(*p, out);
}
fputc('"', out);
@@ -546,18 +675,31 @@ void tt_fputs_nonblank(const char *data, FILE *out)
const char *p;
for (p = data; p && *p; p++) {
- if (isblank((unsigned char) *p) || !isprint((unsigned char) *p))
+ if (isblank((unsigned char) *p) ||
+ !isprint((unsigned char) *p) ||
+ iscntrl((unsigned char) *p)) {
+
fprintf(out, "\\x%02x", *p);
- else
+
+ } else if (*p == '\\' &&
+ *(p + 1) == 'x' &&
+ isxdigit((unsigned char) *(p + 2)) &&
+ isxdigit((unsigned char) *(p + 3))) {
+
+ fprintf(out, "\\x%02x", *p);
+ } else
fputc(*p, out);
}
}
-/* note that this function modifies @data
+/*
+ * Prints data, data maybe be printed in more formats (raw, NAME=xxx pairs) and
+ * control and non-printable chars maybe encoded in \x?? hex encoding.
*/
static void print_data(struct tt *tb, struct tt_column *cl, char *data)
{
- size_t len, i, width;
+ size_t len = 0, i, width;
+ char *buf;
if (!data)
data = "";
@@ -580,7 +722,10 @@ static void print_data(struct tt *tb, struct tt_column *cl, char *data)
}
/* note that 'len' and 'width' are number of cells, not bytes */
- len = mbs_width(data);
+ buf = mbs_safe_encode(data, &len);
+ data = buf;
+ if (!data)
+ data = "";
if (!len || len == (size_t) -1) {
len = 0;
@@ -623,6 +768,8 @@ static void print_data(struct tt *tb, struct tt_column *cl, char *data)
} else
fputc(' ', stdout); /* columns separator */
}
+
+ free(buf);
}
static void print_line(struct tt_line *ln, char *buf, size_t bufsz)