/* * qdist.c - QEMU helpers for handling frequency distributions of data. * * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> * * License: GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" #include "qemu/qdist.h" #include <math.h> #ifndef NAN #define NAN (0.0 / 0.0) #endif #define QDIST_EMPTY_STR "(empty)" void qdist_init(struct qdist *dist) { dist->entries = g_new(struct qdist_entry, 1); dist->size = 1; dist->n = 0; } void qdist_destroy(struct qdist *dist) { g_free(dist->entries); } static inline int qdist_cmp_double(double a, double b) { if (a > b) { return 1; } else if (a < b) { return -1; } return 0; } static int qdist_cmp(const void *ap, const void *bp) { const struct qdist_entry *a = ap; const struct qdist_entry *b = bp; return qdist_cmp_double(a->x, b->x); } void qdist_add(struct qdist *dist, double x, long count) { struct qdist_entry *entry = NULL; if (dist->n) { struct qdist_entry e; e.x = x; entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); } if (entry) { entry->count += count; return; } if (unlikely(dist->n == dist->size)) { dist->size *= 2; dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size); } dist->n++; entry = &dist->entries[dist->n - 1]; entry->x = x; entry->count = count; qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); } void qdist_inc(struct qdist *dist, double x) { qdist_add(dist, x, 1); } /* * Unicode for block elements. See: * https://en.wikipedia.org/wiki/Block_Elements */ static const gunichar qdist_blocks[] = { 0x2581, 0x2582, 0x2583, 0x2584, 0x2585, 0x2586, 0x2587, 0x2588 }; #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) /* * Print a distribution into a string. * * This function assumes that appropriate binning has been done on the input; * see qdist_bin__internal() and qdist_pr_plain(). * * Callers must free the returned string with g_free(). */ static char *qdist_pr_internal(const struct qdist *dist) { double min, max; GString *s = g_string_new(""); size_t i; /* if only one entry, its printout will be either full or empty */ if (dist->n == 1) { if (dist->entries[0].count) { g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); } else { g_string_append_c(s, ' '); } goto out; } /* get min and max counts */ min = dist->entries[0].count; max = min; for (i = 0; i < dist->n; i++) { struct qdist_entry *e = &dist->entries[i]; if (e->count < min) { min = e->count; } if (e->count > max) { max = e->count; } } for (i = 0; i < dist->n; i++) { struct qdist_entry *e = &dist->entries[i]; int index; /* make an exception with 0; instead of using block[0], print a space */ if (e->count) { /* divide first to avoid loss of precision when e->count == max */ index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); g_string_append_unichar(s, qdist_blocks[index]); } else { g_string_append_c(s, ' '); } } out: return g_string_free(s, FALSE); } /* * Bin the distribution in @from into @n bins of consecutive, non-overlapping * intervals, copying the result to @to. * * This function is internal to qdist: only this file and test code should * ever call it. * * Note: calling this function on an already-binned qdist is a bug. * * If @n == 0 or @from->n == 1, use @from->n. */ void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) { double xmin, xmax; double step; size_t i, j; qdist_init(to); if (from->n == 0) { return; } if (n == 0 || from->n == 1) { n = from->n; } /* set equally-sized bins between @from's left and right */ xmin = qdist_xmin(from); xmax = qdist_xmax(from); step = (xmax - xmin) / n; if (n == from->n) { /* if @from's entries are equally spaced, no need to re-bin */ for (i = 0; i < from->n; i++) { if (from->entries[i].x != xmin + i * step) { goto rebin; } } /* they're equally spaced, so copy the dist and bail out */ to->entries = g_renew(struct qdist_entry, to->entries, n); to->n = from->n; memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); return; } rebin: j = 0; for (i = 0; i < n; i++) { double x; double left, right; left = xmin + i * step; right = xmin + (i + 1) * step; /* Add x, even if it might not get any counts later */ x = left; qdist_add(to, x, 0); /* * To avoid double-counting we capture [left, right) ranges, except for * the righmost bin, which captures a [left, right] range. */ while (j < from->n && (from->entries[j].x < right || i == n - 1)) { struct qdist_entry *o = &from->entries[j]; qdist_add(to, x, o->count); j++; } } } /* * Print @dist into a string, after re-binning it into @n bins of consecutive, * non-overlapping intervals. * * If @n == 0, use @orig->n. * * Callers must free the returned string with g_free(). */ char *qdist_pr_plain(const struct qdist *dist, size_t n) { struct qdist binned; char *ret; if (dist->n == 0) { return g_strdup(QDIST_EMPTY_STR); } qdist_bin__internal(&binned, dist, n); ret = qdist_pr_internal(&binned); qdist_destroy(&binned); return ret; } static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, uint32_t opt, bool is_left) { const char *percent; const char *lparen; const char *rparen; GString *s; double x1, x2, step; double x; double n; int dec; s = g_string_new(""); if (!(opt & QDIST_PR_LABELS)) { goto out; } dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; percent = opt & QDIST_PR_PERCENT ? "%" : ""; n = n_bins ? n_bins : dist->n; x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; if (opt & QDIST_PR_100X) { x *= 100.0; step *= 100.0; } if (opt & QDIST_PR_NOBINRANGE) { lparen = rparen = ""; x1 = x; x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ } else { lparen = "["; rparen = is_left ? ")" : "]"; if (is_left) { x1 = x; x2 = x + step; } else { x1 = x - step; x2 = x; } } g_string_append_printf(s, "%s%.*f", lparen, dec, x1); if (!(opt & QDIST_PR_NOBINRANGE)) { g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); } g_string_append(s, percent); out: return g_string_free(s, FALSE); } /* * Print the distribution's histogram into a string. * * See also: qdist_pr_plain(). * * Callers must free the returned string with g_free(). */ char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) { const char *border = opt & QDIST_PR_BORDER ? "|" : ""; char *llabel, *rlabel; char *hgram; GString *s; if (dist->n == 0) { return g_strdup(QDIST_EMPTY_STR); } s = g_string_new(""); llabel = qdist_pr_label(dist, n_bins, opt, true); rlabel = qdist_pr_label(dist, n_bins, opt, false); hgram = qdist_pr_plain(dist, n_bins); g_string_append_printf(s, "%s%s%s%s%s", llabel, border, hgram, border, rlabel); g_free(llabel); g_free(rlabel); g_free(hgram); return g_string_free(s, FALSE); } static inline double qdist_x(const struct qdist *dist, int index) { if (dist->n == 0) { return NAN; } return dist->entries[index].x; } double qdist_xmin(const struct qdist *dist) { return qdist_x(dist, 0); } double qdist_xmax(const struct qdist *dist) { return qdist_x(dist, dist->n - 1); } size_t qdist_unique_entries(const struct qdist *dist) { return dist->n; } unsigned long qdist_sample_count(const struct qdist *dist) { unsigned long count = 0; size_t i; for (i = 0; i < dist->n; i++) { struct qdist_entry *e = &dist->entries[i]; count += e->count; } return count; } static double qdist_pairwise_avg(const struct qdist *dist, size_t index, size_t n, unsigned long count) { /* amortize the recursion by using a base case > 2 */ if (n <= 8) { size_t i; double ret = 0; for (i = 0; i < n; i++) { struct qdist_entry *e = &dist->entries[index + i]; ret += e->x * e->count / count; } return ret; } else { size_t n2 = n / 2; return qdist_pairwise_avg(dist, index, n2, count) + qdist_pairwise_avg(dist, index + n2, n - n2, count); } } double qdist_avg(const struct qdist *dist) { unsigned long count; count = qdist_sample_count(dist); if (!count) { return NAN; } return qdist_pairwise_avg(dist, 0, dist->n, count); }