qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

fp-bench.c (19958B)


      1 /*
      2  * fp-bench.c - A collection of simple floating point microbenchmarks.
      3  *
      4  * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
      5  *
      6  * License: GNU GPL, version 2 or later.
      7  *   See the COPYING file in the top-level directory.
      8  */
      9 #ifndef HW_POISON_H
     10 #error Must define HW_POISON_H to work around TARGET_* poisoning
     11 #endif
     12 
     13 #include "qemu/osdep.h"
     14 #include <math.h>
     15 #include <fenv.h>
     16 #include "qemu/timer.h"
     17 #include "qemu/int128.h"
     18 #include "fpu/softfloat.h"
     19 
     20 /* amortize the computation of random inputs */
     21 #define OPS_PER_ITER     50000
     22 
     23 #define MAX_OPERANDS 3
     24 
     25 #define SEED_A 0xdeadfacedeadface
     26 #define SEED_B 0xbadc0feebadc0fee
     27 #define SEED_C 0xbeefdeadbeefdead
     28 
     29 enum op {
     30     OP_ADD,
     31     OP_SUB,
     32     OP_MUL,
     33     OP_DIV,
     34     OP_FMA,
     35     OP_SQRT,
     36     OP_CMP,
     37     OP_MAX_NR,
     38 };
     39 
     40 static const char * const op_names[] = {
     41     [OP_ADD] = "add",
     42     [OP_SUB] = "sub",
     43     [OP_MUL] = "mul",
     44     [OP_DIV] = "div",
     45     [OP_FMA] = "mulAdd",
     46     [OP_SQRT] = "sqrt",
     47     [OP_CMP] = "cmp",
     48     [OP_MAX_NR] = NULL,
     49 };
     50 
     51 enum precision {
     52     PREC_SINGLE,
     53     PREC_DOUBLE,
     54     PREC_QUAD,
     55     PREC_FLOAT32,
     56     PREC_FLOAT64,
     57     PREC_FLOAT128,
     58     PREC_MAX_NR,
     59 };
     60 
     61 enum rounding {
     62     ROUND_EVEN,
     63     ROUND_ZERO,
     64     ROUND_DOWN,
     65     ROUND_UP,
     66     ROUND_TIEAWAY,
     67     N_ROUND_MODES,
     68 };
     69 
     70 static const char * const round_names[] = {
     71     [ROUND_EVEN] = "even",
     72     [ROUND_ZERO] = "zero",
     73     [ROUND_DOWN] = "down",
     74     [ROUND_UP] = "up",
     75     [ROUND_TIEAWAY] = "tieaway",
     76 };
     77 
     78 enum tester {
     79     TESTER_SOFT,
     80     TESTER_HOST,
     81     TESTER_MAX_NR,
     82 };
     83 
     84 static const char * const tester_names[] = {
     85     [TESTER_SOFT] = "soft",
     86     [TESTER_HOST] = "host",
     87     [TESTER_MAX_NR] = NULL,
     88 };
     89 
     90 union fp {
     91     float f;
     92     double d;
     93     float32 f32;
     94     float64 f64;
     95     float128 f128;
     96     uint64_t u64;
     97 };
     98 
     99 struct op_state;
    100 
    101 typedef float (*float_func_t)(const struct op_state *s);
    102 typedef double (*double_func_t)(const struct op_state *s);
    103 
    104 union fp_func {
    105     float_func_t float_func;
    106     double_func_t double_func;
    107 };
    108 
    109 typedef void (*bench_func_t)(void);
    110 
    111 struct op_desc {
    112     const char * const name;
    113 };
    114 
    115 #define DEFAULT_DURATION_SECS 1
    116 
    117 static uint64_t random_ops[MAX_OPERANDS] = {
    118     SEED_A, SEED_B, SEED_C,
    119 };
    120 
    121 static float128 random_quad_ops[MAX_OPERANDS] = {
    122     {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
    123 };
    124 static float_status soft_status;
    125 static enum precision precision;
    126 static enum op operation;
    127 static enum tester tester;
    128 static uint64_t n_completed_ops;
    129 static unsigned int duration = DEFAULT_DURATION_SECS;
    130 static int64_t ns_elapsed;
    131 /* disable optimizations with volatile */
    132 static volatile union fp res;
    133 
    134 /*
    135  * From: https://en.wikipedia.org/wiki/Xorshift
    136  * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
    137  * guaranteed to be >= INT_MAX).
    138  */
    139 static uint64_t xorshift64star(uint64_t x)
    140 {
    141     x ^= x >> 12; /* a */
    142     x ^= x << 25; /* b */
    143     x ^= x >> 27; /* c */
    144     return x * UINT64_C(2685821657736338717);
    145 }
    146 
    147 static void update_random_ops(int n_ops, enum precision prec)
    148 {
    149     int i;
    150 
    151     for (i = 0; i < n_ops; i++) {
    152 
    153         switch (prec) {
    154         case PREC_SINGLE:
    155         case PREC_FLOAT32:
    156         {
    157             uint64_t r = random_ops[i];
    158             do {
    159                 r = xorshift64star(r);
    160             } while (!float32_is_normal(r));
    161             random_ops[i] = r;
    162             break;
    163         }
    164         case PREC_DOUBLE:
    165         case PREC_FLOAT64:
    166         {
    167             uint64_t r = random_ops[i];
    168             do {
    169                 r = xorshift64star(r);
    170             } while (!float64_is_normal(r));
    171             random_ops[i] = r;
    172             break;
    173         }
    174         case PREC_QUAD:
    175         case PREC_FLOAT128:
    176         {
    177             float128 r = random_quad_ops[i];
    178             uint64_t hi = r.high;
    179             uint64_t lo = r.low;
    180             do {
    181                 hi = xorshift64star(hi);
    182                 lo = xorshift64star(lo);
    183                 r = make_float128(hi, lo);
    184             } while (!float128_is_normal(r));
    185             random_quad_ops[i] = r;
    186             break;
    187         }
    188         default:
    189             g_assert_not_reached();
    190         }
    191     }
    192 }
    193 
    194 static void fill_random(union fp *ops, int n_ops, enum precision prec,
    195                         bool no_neg)
    196 {
    197     int i;
    198 
    199     for (i = 0; i < n_ops; i++) {
    200         switch (prec) {
    201         case PREC_SINGLE:
    202         case PREC_FLOAT32:
    203             ops[i].f32 = make_float32(random_ops[i]);
    204             if (no_neg && float32_is_neg(ops[i].f32)) {
    205                 ops[i].f32 = float32_chs(ops[i].f32);
    206             }
    207             break;
    208         case PREC_DOUBLE:
    209         case PREC_FLOAT64:
    210             ops[i].f64 = make_float64(random_ops[i]);
    211             if (no_neg && float64_is_neg(ops[i].f64)) {
    212                 ops[i].f64 = float64_chs(ops[i].f64);
    213             }
    214             break;
    215         case PREC_QUAD:
    216         case PREC_FLOAT128:
    217             ops[i].f128 = random_quad_ops[i];
    218             if (no_neg && float128_is_neg(ops[i].f128)) {
    219                 ops[i].f128 = float128_chs(ops[i].f128);
    220             }
    221             break;
    222         default:
    223             g_assert_not_reached();
    224         }
    225     }
    226 }
    227 
    228 /*
    229  * The main benchmark function. Instead of (ab)using macros, we rely
    230  * on the compiler to unfold this at compile-time.
    231  */
    232 static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
    233 {
    234     int64_t tf = get_clock() + duration * 1000000000LL;
    235 
    236     while (get_clock() < tf) {
    237         union fp ops[MAX_OPERANDS];
    238         int64_t t0;
    239         int i;
    240 
    241         update_random_ops(n_ops, prec);
    242         switch (prec) {
    243         case PREC_SINGLE:
    244             fill_random(ops, n_ops, prec, no_neg);
    245             t0 = get_clock();
    246             for (i = 0; i < OPS_PER_ITER; i++) {
    247                 float a = ops[0].f;
    248                 float b = ops[1].f;
    249                 float c = ops[2].f;
    250 
    251                 switch (op) {
    252                 case OP_ADD:
    253                     res.f = a + b;
    254                     break;
    255                 case OP_SUB:
    256                     res.f = a - b;
    257                     break;
    258                 case OP_MUL:
    259                     res.f = a * b;
    260                     break;
    261                 case OP_DIV:
    262                     res.f = a / b;
    263                     break;
    264                 case OP_FMA:
    265                     res.f = fmaf(a, b, c);
    266                     break;
    267                 case OP_SQRT:
    268                     res.f = sqrtf(a);
    269                     break;
    270                 case OP_CMP:
    271                     res.u64 = isgreater(a, b);
    272                     break;
    273                 default:
    274                     g_assert_not_reached();
    275                 }
    276             }
    277             break;
    278         case PREC_DOUBLE:
    279             fill_random(ops, n_ops, prec, no_neg);
    280             t0 = get_clock();
    281             for (i = 0; i < OPS_PER_ITER; i++) {
    282                 double a = ops[0].d;
    283                 double b = ops[1].d;
    284                 double c = ops[2].d;
    285 
    286                 switch (op) {
    287                 case OP_ADD:
    288                     res.d = a + b;
    289                     break;
    290                 case OP_SUB:
    291                     res.d = a - b;
    292                     break;
    293                 case OP_MUL:
    294                     res.d = a * b;
    295                     break;
    296                 case OP_DIV:
    297                     res.d = a / b;
    298                     break;
    299                 case OP_FMA:
    300                     res.d = fma(a, b, c);
    301                     break;
    302                 case OP_SQRT:
    303                     res.d = sqrt(a);
    304                     break;
    305                 case OP_CMP:
    306                     res.u64 = isgreater(a, b);
    307                     break;
    308                 default:
    309                     g_assert_not_reached();
    310                 }
    311             }
    312             break;
    313         case PREC_FLOAT32:
    314             fill_random(ops, n_ops, prec, no_neg);
    315             t0 = get_clock();
    316             for (i = 0; i < OPS_PER_ITER; i++) {
    317                 float32 a = ops[0].f32;
    318                 float32 b = ops[1].f32;
    319                 float32 c = ops[2].f32;
    320 
    321                 switch (op) {
    322                 case OP_ADD:
    323                     res.f32 = float32_add(a, b, &soft_status);
    324                     break;
    325                 case OP_SUB:
    326                     res.f32 = float32_sub(a, b, &soft_status);
    327                     break;
    328                 case OP_MUL:
    329                     res.f = float32_mul(a, b, &soft_status);
    330                     break;
    331                 case OP_DIV:
    332                     res.f32 = float32_div(a, b, &soft_status);
    333                     break;
    334                 case OP_FMA:
    335                     res.f32 = float32_muladd(a, b, c, 0, &soft_status);
    336                     break;
    337                 case OP_SQRT:
    338                     res.f32 = float32_sqrt(a, &soft_status);
    339                     break;
    340                 case OP_CMP:
    341                     res.u64 = float32_compare_quiet(a, b, &soft_status);
    342                     break;
    343                 default:
    344                     g_assert_not_reached();
    345                 }
    346             }
    347             break;
    348         case PREC_FLOAT64:
    349             fill_random(ops, n_ops, prec, no_neg);
    350             t0 = get_clock();
    351             for (i = 0; i < OPS_PER_ITER; i++) {
    352                 float64 a = ops[0].f64;
    353                 float64 b = ops[1].f64;
    354                 float64 c = ops[2].f64;
    355 
    356                 switch (op) {
    357                 case OP_ADD:
    358                     res.f64 = float64_add(a, b, &soft_status);
    359                     break;
    360                 case OP_SUB:
    361                     res.f64 = float64_sub(a, b, &soft_status);
    362                     break;
    363                 case OP_MUL:
    364                     res.f = float64_mul(a, b, &soft_status);
    365                     break;
    366                 case OP_DIV:
    367                     res.f64 = float64_div(a, b, &soft_status);
    368                     break;
    369                 case OP_FMA:
    370                     res.f64 = float64_muladd(a, b, c, 0, &soft_status);
    371                     break;
    372                 case OP_SQRT:
    373                     res.f64 = float64_sqrt(a, &soft_status);
    374                     break;
    375                 case OP_CMP:
    376                     res.u64 = float64_compare_quiet(a, b, &soft_status);
    377                     break;
    378                 default:
    379                     g_assert_not_reached();
    380                 }
    381             }
    382             break;
    383         case PREC_FLOAT128:
    384             fill_random(ops, n_ops, prec, no_neg);
    385             t0 = get_clock();
    386             for (i = 0; i < OPS_PER_ITER; i++) {
    387                 float128 a = ops[0].f128;
    388                 float128 b = ops[1].f128;
    389                 float128 c = ops[2].f128;
    390 
    391                 switch (op) {
    392                 case OP_ADD:
    393                     res.f128 = float128_add(a, b, &soft_status);
    394                     break;
    395                 case OP_SUB:
    396                     res.f128 = float128_sub(a, b, &soft_status);
    397                     break;
    398                 case OP_MUL:
    399                     res.f128 = float128_mul(a, b, &soft_status);
    400                     break;
    401                 case OP_DIV:
    402                     res.f128 = float128_div(a, b, &soft_status);
    403                     break;
    404                 case OP_FMA:
    405                     res.f128 = float128_muladd(a, b, c, 0, &soft_status);
    406                     break;
    407                 case OP_SQRT:
    408                     res.f128 = float128_sqrt(a, &soft_status);
    409                     break;
    410                 case OP_CMP:
    411                     res.u64 = float128_compare_quiet(a, b, &soft_status);
    412                     break;
    413                 default:
    414                     g_assert_not_reached();
    415                 }
    416             }
    417             break;
    418         default:
    419             g_assert_not_reached();
    420         }
    421         ns_elapsed += get_clock() - t0;
    422         n_completed_ops += OPS_PER_ITER;
    423     }
    424 }
    425 
    426 #define GEN_BENCH(name, type, prec, op, n_ops)          \
    427     static void __attribute__((flatten)) name(void)     \
    428     {                                                   \
    429         bench(prec, op, n_ops, false);                  \
    430     }
    431 
    432 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
    433     static void __attribute__((flatten)) name(void)     \
    434     {                                                   \
    435         bench(prec, op, n_ops, true);                   \
    436     }
    437 
    438 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
    439     GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
    440     GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
    441     GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
    442     GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
    443     GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
    444 
    445 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
    446 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
    447 GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
    448 GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
    449 GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
    450 GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
    451 #undef GEN_BENCH_ALL_TYPES
    452 
    453 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
    454     GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
    455     GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
    456     GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
    457     GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
    458     GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
    459 
    460 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
    461 #undef GEN_BENCH_ALL_TYPES_NO_NEG
    462 
    463 #undef GEN_BENCH_NO_NEG
    464 #undef GEN_BENCH
    465 
    466 #define GEN_BENCH_FUNCS(opname, op)                             \
    467     [op] = {                                                    \
    468         [PREC_SINGLE]    = bench_ ## opname ## _float,          \
    469         [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
    470         [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
    471         [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
    472         [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
    473     }
    474 
    475 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
    476     GEN_BENCH_FUNCS(add, OP_ADD),
    477     GEN_BENCH_FUNCS(sub, OP_SUB),
    478     GEN_BENCH_FUNCS(mul, OP_MUL),
    479     GEN_BENCH_FUNCS(div, OP_DIV),
    480     GEN_BENCH_FUNCS(fma, OP_FMA),
    481     GEN_BENCH_FUNCS(sqrt, OP_SQRT),
    482     GEN_BENCH_FUNCS(cmp, OP_CMP),
    483 };
    484 
    485 #undef GEN_BENCH_FUNCS
    486 
    487 static void run_bench(void)
    488 {
    489     bench_func_t f;
    490 
    491     f = bench_funcs[operation][precision];
    492     g_assert(f);
    493     f();
    494 }
    495 
    496 /* @arr must be NULL-terminated */
    497 static int find_name(const char * const *arr, const char *name)
    498 {
    499     int i;
    500 
    501     for (i = 0; arr[i] != NULL; i++) {
    502         if (strcmp(name, arr[i]) == 0) {
    503             return i;
    504         }
    505     }
    506     return -1;
    507 }
    508 
    509 static void usage_complete(int argc, char *argv[])
    510 {
    511     gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
    512     gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
    513 
    514     fprintf(stderr, "Usage: %s [options]\n", argv[0]);
    515     fprintf(stderr, "options:\n");
    516     fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
    517             DEFAULT_DURATION_SECS);
    518     fprintf(stderr, " -h = show this help message.\n");
    519     fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
    520             op_list, op_names[0]);
    521     fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
    522             "Default: single\n");
    523     fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
    524             "Default: even\n");
    525     fprintf(stderr, " -t = tester (%s). Default: %s\n",
    526             tester_list, tester_names[0]);
    527     fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
    528             "Default: disabled\n");
    529     fprintf(stderr, " -Z = flush output to zero (soft tester only). "
    530             "Default: disabled\n");
    531 
    532     g_free(tester_list);
    533     g_free(op_list);
    534 }
    535 
    536 static int round_name_to_mode(const char *name)
    537 {
    538     int i;
    539 
    540     for (i = 0; i < N_ROUND_MODES; i++) {
    541         if (!strcmp(round_names[i], name)) {
    542             return i;
    543         }
    544     }
    545     return -1;
    546 }
    547 
    548 static G_NORETURN
    549 void die_host_rounding(enum rounding rounding)
    550 {
    551     fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
    552             round_names[rounding]);
    553     exit(EXIT_FAILURE);
    554 }
    555 
    556 static void set_host_precision(enum rounding rounding)
    557 {
    558     int rhost;
    559 
    560     switch (rounding) {
    561     case ROUND_EVEN:
    562         rhost = FE_TONEAREST;
    563         break;
    564     case ROUND_ZERO:
    565         rhost = FE_TOWARDZERO;
    566         break;
    567     case ROUND_DOWN:
    568         rhost = FE_DOWNWARD;
    569         break;
    570     case ROUND_UP:
    571         rhost = FE_UPWARD;
    572         break;
    573     case ROUND_TIEAWAY:
    574         die_host_rounding(rounding);
    575         return;
    576     default:
    577         g_assert_not_reached();
    578     }
    579 
    580     if (fesetround(rhost)) {
    581         die_host_rounding(rounding);
    582     }
    583 }
    584 
    585 static void set_soft_precision(enum rounding rounding)
    586 {
    587     signed char mode;
    588 
    589     switch (rounding) {
    590     case ROUND_EVEN:
    591         mode = float_round_nearest_even;
    592         break;
    593     case ROUND_ZERO:
    594         mode = float_round_to_zero;
    595         break;
    596     case ROUND_DOWN:
    597         mode = float_round_down;
    598         break;
    599     case ROUND_UP:
    600         mode = float_round_up;
    601         break;
    602     case ROUND_TIEAWAY:
    603         mode = float_round_ties_away;
    604         break;
    605     default:
    606         g_assert_not_reached();
    607     }
    608     soft_status.float_rounding_mode = mode;
    609 }
    610 
    611 static void parse_args(int argc, char *argv[])
    612 {
    613     int c;
    614     int val;
    615     int rounding = ROUND_EVEN;
    616 
    617     for (;;) {
    618         c = getopt(argc, argv, "d:ho:p:r:t:zZ");
    619         if (c < 0) {
    620             break;
    621         }
    622         switch (c) {
    623         case 'd':
    624             duration = atoi(optarg);
    625             break;
    626         case 'h':
    627             usage_complete(argc, argv);
    628             exit(EXIT_SUCCESS);
    629         case 'o':
    630             val = find_name(op_names, optarg);
    631             if (val < 0) {
    632                 fprintf(stderr, "Unsupported op '%s'\n", optarg);
    633                 exit(EXIT_FAILURE);
    634             }
    635             operation = val;
    636             break;
    637         case 'p':
    638             if (!strcmp(optarg, "single")) {
    639                 precision = PREC_SINGLE;
    640             } else if (!strcmp(optarg, "double")) {
    641                 precision = PREC_DOUBLE;
    642             } else if (!strcmp(optarg, "quad")) {
    643                 precision = PREC_QUAD;
    644             } else {
    645                 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
    646                 exit(EXIT_FAILURE);
    647             }
    648             break;
    649         case 'r':
    650             rounding = round_name_to_mode(optarg);
    651             if (rounding < 0) {
    652                 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
    653                 exit(EXIT_FAILURE);
    654             }
    655             break;
    656         case 't':
    657             val = find_name(tester_names, optarg);
    658             if (val < 0) {
    659                 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
    660                 exit(EXIT_FAILURE);
    661             }
    662             tester = val;
    663             break;
    664         case 'z':
    665             soft_status.flush_inputs_to_zero = 1;
    666             break;
    667         case 'Z':
    668             soft_status.flush_to_zero = 1;
    669             break;
    670         }
    671     }
    672 
    673     /* set precision and rounding mode based on the tester */
    674     switch (tester) {
    675     case TESTER_HOST:
    676         set_host_precision(rounding);
    677         break;
    678     case TESTER_SOFT:
    679         set_soft_precision(rounding);
    680         switch (precision) {
    681         case PREC_SINGLE:
    682             precision = PREC_FLOAT32;
    683             break;
    684         case PREC_DOUBLE:
    685             precision = PREC_FLOAT64;
    686             break;
    687         case PREC_QUAD:
    688             precision = PREC_FLOAT128;
    689             break;
    690         default:
    691             g_assert_not_reached();
    692         }
    693         break;
    694     default:
    695         g_assert_not_reached();
    696     }
    697 }
    698 
    699 static void pr_stats(void)
    700 {
    701     printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
    702 }
    703 
    704 int main(int argc, char *argv[])
    705 {
    706     parse_args(argc, argv);
    707     run_bench();
    708     pr_stats();
    709     return 0;
    710 }