qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

qdist.c (9469B)


      1 /*
      2  * qdist.c - QEMU helpers for handling frequency distributions of data.
      3  *
      4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
      5  *
      6  * License: GNU GPL, version 2 or later.
      7  *   See the COPYING file in the top-level directory.
      8  */
      9 #include "qemu/osdep.h"
     10 #include "qemu/qdist.h"
     11 
     12 #include <math.h>
     13 #ifndef NAN
     14 #define NAN (0.0 / 0.0)
     15 #endif
     16 
     17 #define QDIST_EMPTY_STR "(empty)"
     18 
     19 void qdist_init(struct qdist *dist)
     20 {
     21     dist->entries = g_new(struct qdist_entry, 1);
     22     dist->size = 1;
     23     dist->n = 0;
     24 }
     25 
     26 void qdist_destroy(struct qdist *dist)
     27 {
     28     g_free(dist->entries);
     29 }
     30 
     31 static inline int qdist_cmp_double(double a, double b)
     32 {
     33     if (a > b) {
     34         return 1;
     35     } else if (a < b) {
     36         return -1;
     37     }
     38     return 0;
     39 }
     40 
     41 static int qdist_cmp(const void *ap, const void *bp)
     42 {
     43     const struct qdist_entry *a = ap;
     44     const struct qdist_entry *b = bp;
     45 
     46     return qdist_cmp_double(a->x, b->x);
     47 }
     48 
     49 void qdist_add(struct qdist *dist, double x, long count)
     50 {
     51     struct qdist_entry *entry = NULL;
     52 
     53     if (dist->n) {
     54         struct qdist_entry e;
     55 
     56         e.x = x;
     57         entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
     58     }
     59 
     60     if (entry) {
     61         entry->count += count;
     62         return;
     63     }
     64 
     65     if (unlikely(dist->n == dist->size)) {
     66         dist->size *= 2;
     67         dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
     68     }
     69     dist->n++;
     70     entry = &dist->entries[dist->n - 1];
     71     entry->x = x;
     72     entry->count = count;
     73     qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
     74 }
     75 
     76 void qdist_inc(struct qdist *dist, double x)
     77 {
     78     qdist_add(dist, x, 1);
     79 }
     80 
     81 /*
     82  * Unicode for block elements. See:
     83  *   https://en.wikipedia.org/wiki/Block_Elements
     84  */
     85 static const gunichar qdist_blocks[] = {
     86     0x2581,
     87     0x2582,
     88     0x2583,
     89     0x2584,
     90     0x2585,
     91     0x2586,
     92     0x2587,
     93     0x2588
     94 };
     95 
     96 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
     97 
     98 /*
     99  * Print a distribution into a string.
    100  *
    101  * This function assumes that appropriate binning has been done on the input;
    102  * see qdist_bin__internal() and qdist_pr_plain().
    103  *
    104  * Callers must free the returned string with g_free().
    105  */
    106 static char *qdist_pr_internal(const struct qdist *dist)
    107 {
    108     double min, max;
    109     GString *s = g_string_new("");
    110     size_t i;
    111 
    112     /* if only one entry, its printout will be either full or empty */
    113     if (dist->n == 1) {
    114         if (dist->entries[0].count) {
    115             g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
    116         } else {
    117             g_string_append_c(s, ' ');
    118         }
    119         goto out;
    120     }
    121 
    122     /* get min and max counts */
    123     min = dist->entries[0].count;
    124     max = min;
    125     for (i = 0; i < dist->n; i++) {
    126         struct qdist_entry *e = &dist->entries[i];
    127 
    128         if (e->count < min) {
    129             min = e->count;
    130         }
    131         if (e->count > max) {
    132             max = e->count;
    133         }
    134     }
    135 
    136     for (i = 0; i < dist->n; i++) {
    137         struct qdist_entry *e = &dist->entries[i];
    138         int index;
    139 
    140         /* make an exception with 0; instead of using block[0], print a space */
    141         if (e->count) {
    142             /* divide first to avoid loss of precision when e->count == max */
    143             index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
    144             g_string_append_unichar(s, qdist_blocks[index]);
    145         } else {
    146             g_string_append_c(s, ' ');
    147         }
    148     }
    149  out:
    150     return g_string_free(s, FALSE);
    151 }
    152 
    153 /*
    154  * Bin the distribution in @from into @n bins of consecutive, non-overlapping
    155  * intervals, copying the result to @to.
    156  *
    157  * This function is internal to qdist: only this file and test code should
    158  * ever call it.
    159  *
    160  * Note: calling this function on an already-binned qdist is a bug.
    161  *
    162  * If @n == 0 or @from->n == 1, use @from->n.
    163  */
    164 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
    165 {
    166     double xmin, xmax;
    167     double step;
    168     size_t i, j;
    169 
    170     qdist_init(to);
    171 
    172     if (from->n == 0) {
    173         return;
    174     }
    175     if (n == 0 || from->n == 1) {
    176         n = from->n;
    177     }
    178 
    179     /* set equally-sized bins between @from's left and right */
    180     xmin = qdist_xmin(from);
    181     xmax = qdist_xmax(from);
    182     step = (xmax - xmin) / n;
    183 
    184     if (n == from->n) {
    185         /* if @from's entries are equally spaced, no need to re-bin */
    186         for (i = 0; i < from->n; i++) {
    187             if (from->entries[i].x != xmin + i * step) {
    188                 goto rebin;
    189             }
    190         }
    191         /* they're equally spaced, so copy the dist and bail out */
    192         to->entries = g_renew(struct qdist_entry, to->entries, n);
    193         to->n = from->n;
    194         memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
    195         return;
    196     }
    197 
    198  rebin:
    199     j = 0;
    200     for (i = 0; i < n; i++) {
    201         double x;
    202         double left, right;
    203 
    204         left = xmin + i * step;
    205         right = xmin + (i + 1) * step;
    206 
    207         /* Add x, even if it might not get any counts later */
    208         x = left;
    209         qdist_add(to, x, 0);
    210 
    211         /*
    212          * To avoid double-counting we capture [left, right) ranges, except for
    213          * the righmost bin, which captures a [left, right] range.
    214          */
    215         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
    216             struct qdist_entry *o = &from->entries[j];
    217 
    218             qdist_add(to, x, o->count);
    219             j++;
    220         }
    221     }
    222 }
    223 
    224 /*
    225  * Print @dist into a string, after re-binning it into @n bins of consecutive,
    226  * non-overlapping intervals.
    227  *
    228  * If @n == 0, use @orig->n.
    229  *
    230  * Callers must free the returned string with g_free().
    231  */
    232 char *qdist_pr_plain(const struct qdist *dist, size_t n)
    233 {
    234     struct qdist binned;
    235     char *ret;
    236 
    237     if (dist->n == 0) {
    238         return g_strdup(QDIST_EMPTY_STR);
    239     }
    240     qdist_bin__internal(&binned, dist, n);
    241     ret = qdist_pr_internal(&binned);
    242     qdist_destroy(&binned);
    243     return ret;
    244 }
    245 
    246 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
    247                             uint32_t opt, bool is_left)
    248 {
    249     const char *percent;
    250     const char *lparen;
    251     const char *rparen;
    252     GString *s;
    253     double x1, x2, step;
    254     double x;
    255     double n;
    256     int dec;
    257 
    258     s = g_string_new("");
    259     if (!(opt & QDIST_PR_LABELS)) {
    260         goto out;
    261     }
    262 
    263     dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
    264     percent = opt & QDIST_PR_PERCENT ? "%" : "";
    265 
    266     n = n_bins ? n_bins : dist->n;
    267     x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
    268     step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
    269 
    270     if (opt & QDIST_PR_100X) {
    271         x *= 100.0;
    272         step *= 100.0;
    273     }
    274     if (opt & QDIST_PR_NOBINRANGE) {
    275         lparen = rparen = "";
    276         x1 = x;
    277         x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
    278     } else {
    279         lparen = "[";
    280         rparen = is_left ? ")" : "]";
    281         if (is_left) {
    282             x1 = x;
    283             x2 = x + step;
    284         } else {
    285             x1 = x - step;
    286             x2 = x;
    287         }
    288     }
    289     g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
    290     if (!(opt & QDIST_PR_NOBINRANGE)) {
    291         g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
    292     }
    293     g_string_append(s, percent);
    294  out:
    295     return g_string_free(s, FALSE);
    296 }
    297 
    298 /*
    299  * Print the distribution's histogram into a string.
    300  *
    301  * See also: qdist_pr_plain().
    302  *
    303  * Callers must free the returned string with g_free().
    304  */
    305 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
    306 {
    307     const char *border = opt & QDIST_PR_BORDER ? "|" : "";
    308     char *llabel, *rlabel;
    309     char *hgram;
    310     GString *s;
    311 
    312     if (dist->n == 0) {
    313         return g_strdup(QDIST_EMPTY_STR);
    314     }
    315 
    316     s = g_string_new("");
    317 
    318     llabel = qdist_pr_label(dist, n_bins, opt, true);
    319     rlabel = qdist_pr_label(dist, n_bins, opt, false);
    320     hgram = qdist_pr_plain(dist, n_bins);
    321     g_string_append_printf(s, "%s%s%s%s%s",
    322                            llabel, border, hgram, border, rlabel);
    323     g_free(llabel);
    324     g_free(rlabel);
    325     g_free(hgram);
    326 
    327     return g_string_free(s, FALSE);
    328 }
    329 
    330 static inline double qdist_x(const struct qdist *dist, int index)
    331 {
    332     if (dist->n == 0) {
    333         return NAN;
    334     }
    335     return dist->entries[index].x;
    336 }
    337 
    338 double qdist_xmin(const struct qdist *dist)
    339 {
    340     return qdist_x(dist, 0);
    341 }
    342 
    343 double qdist_xmax(const struct qdist *dist)
    344 {
    345     return qdist_x(dist, dist->n - 1);
    346 }
    347 
    348 size_t qdist_unique_entries(const struct qdist *dist)
    349 {
    350     return dist->n;
    351 }
    352 
    353 unsigned long qdist_sample_count(const struct qdist *dist)
    354 {
    355     unsigned long count = 0;
    356     size_t i;
    357 
    358     for (i = 0; i < dist->n; i++) {
    359         struct qdist_entry *e = &dist->entries[i];
    360 
    361         count += e->count;
    362     }
    363     return count;
    364 }
    365 
    366 static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
    367                                  size_t n, unsigned long count)
    368 {
    369     /* amortize the recursion by using a base case > 2 */
    370     if (n <= 8) {
    371         size_t i;
    372         double ret = 0;
    373 
    374         for (i = 0; i < n; i++) {
    375             struct qdist_entry *e = &dist->entries[index + i];
    376 
    377             ret += e->x * e->count / count;
    378         }
    379         return ret;
    380     } else {
    381         size_t n2 = n / 2;
    382 
    383         return qdist_pairwise_avg(dist, index, n2, count) +
    384                qdist_pairwise_avg(dist, index + n2, n - n2, count);
    385     }
    386 }
    387 
    388 double qdist_avg(const struct qdist *dist)
    389 {
    390     unsigned long count;
    391 
    392     count = qdist_sample_count(dist);
    393     if (!count) {
    394         return NAN;
    395     }
    396     return qdist_pairwise_avg(dist, 0, dist->n, count);
    397 }