report.py - libcxx - libcxx mirror with random patches

report.py (19076B)
      1 import unittest
      2 """report.py - Utilities for reporting statistics about benchmark results
      3 """
      4 import os
      5 import re
      6 import copy
      7 
      8 from scipy.stats import mannwhitneyu
      9 
     10 
     11 class BenchmarkColor(object):
     12     def __init__(self, name, code):
     13         self.name = name
     14         self.code = code
     15 
     16     def __repr__(self):
     17         return '%s%r' % (self.__class__.__name__,
     18                          (self.name, self.code))
     19 
     20     def __format__(self, format):
     21         return self.code
     22 
     23 
     24 # Benchmark Colors Enumeration
     25 BC_NONE = BenchmarkColor('NONE', '')
     26 BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
     27 BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
     28 BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
     29 BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
     30 BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
     31 BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
     32 BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
     33 BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
     34 BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
     35 BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
     36 BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
     37 
     38 UTEST_MIN_REPETITIONS = 2
     39 UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
     40 UTEST_COL_NAME = "_pvalue"
     41 
     42 
     43 def color_format(use_color, fmt_str, *args, **kwargs):
     44     """
     45     Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
     46     'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
     47     is False then all color codes in 'args' and 'kwargs' are replaced with
     48     the empty string.
     49     """
     50     assert use_color is True or use_color is False
     51     if not use_color:
     52         args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
     53                 for arg in args]
     54         kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
     55                   for key, arg in kwargs.items()}
     56     return fmt_str.format(*args, **kwargs)
     57 
     58 
     59 def find_longest_name(benchmark_list):
     60     """
     61     Return the length of the longest benchmark name in a given list of
     62     benchmark JSON objects
     63     """
     64     longest_name = 1
     65     for bc in benchmark_list:
     66         if len(bc['name']) > longest_name:
     67             longest_name = len(bc['name'])
     68     return longest_name
     69 
     70 
     71 def calculate_change(old_val, new_val):
     72     """
     73     Return a float representing the decimal change between old_val and new_val.
     74     """
     75     if old_val == 0 and new_val == 0:
     76         return 0.0
     77     if old_val == 0:
     78         return float(new_val - old_val) / (float(old_val + new_val) / 2)
     79     return float(new_val - old_val) / abs(old_val)
     80 
     81 
     82 def filter_benchmark(json_orig, family, replacement=""):
     83     """
     84     Apply a filter to the json, and only leave the 'family' of benchmarks.
     85     """
     86     regex = re.compile(family)
     87     filtered = {}
     88     filtered['benchmarks'] = []
     89     for be in json_orig['benchmarks']:
     90         if not regex.search(be['name']):
     91             continue
     92         filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
     93         filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
     94         filtered['benchmarks'].append(filteredbench)
     95     return filtered
     96 
     97 
     98 def get_unique_benchmark_names(json):
     99     """
    100     While *keeping* the order, give all the unique 'names' used for benchmarks.
    101     """
    102     seen = set()
    103     uniqued = [x['name'] for x in json['benchmarks']
    104                if x['name'] not in seen and
    105                (seen.add(x['name']) or True)]
    106     return uniqued
    107 
    108 
    109 def intersect(list1, list2):
    110     """
    111     Given two lists, get a new list consisting of the elements only contained
    112     in *both of the input lists*, while preserving the ordering.
    113     """
    114     return [x for x in list1 if x in list2]
    115 
    116 
    117 def partition_benchmarks(json1, json2):
    118     """
    119     While preserving the ordering, find benchmarks with the same names in
    120     both of the inputs, and group them.
    121     (i.e. partition/filter into groups with common name)
    122     """
    123     json1_unique_names = get_unique_benchmark_names(json1)
    124     json2_unique_names = get_unique_benchmark_names(json2)
    125     names = intersect(json1_unique_names, json2_unique_names)
    126     partitions = []
    127     for name in names:
    128         # Pick the time unit from the first entry of the lhs benchmark.
    129         time_unit = (x['time_unit']
    130                      for x in json1['benchmarks'] if x['name'] == name).next()
    131         # Filter by name and time unit.
    132         lhs = [x for x in json1['benchmarks'] if x['name'] == name and
    133                x['time_unit'] == time_unit]
    134         rhs = [x for x in json2['benchmarks'] if x['name'] == name and
    135                x['time_unit'] == time_unit]
    136         partitions.append([lhs, rhs])
    137     return partitions
    138 
    139 
    140 def extract_field(partition, field_name):
    141     # The count of elements may be different. We want *all* of them.
    142     lhs = [x[field_name] for x in partition[0]]
    143     rhs = [x[field_name] for x in partition[1]]
    144     return [lhs, rhs]
    145 
    146 
    147 def print_utest(partition, utest_alpha, first_col_width, use_color=True):
    148     timings_time = extract_field(partition, 'real_time')
    149     timings_cpu = extract_field(partition, 'cpu_time')
    150 
    151     min_rep_cnt = min(len(timings_time[0]),
    152                       len(timings_time[1]),
    153                       len(timings_cpu[0]),
    154                       len(timings_cpu[1]))
    155 
    156     # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
    157     if min_rep_cnt < UTEST_MIN_REPETITIONS:
    158         return []
    159 
    160     def get_utest_color(pval):
    161         return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
    162 
    163     time_pvalue = mannwhitneyu(
    164         timings_time[0], timings_time[1], alternative='two-sided').pvalue
    165     cpu_pvalue = mannwhitneyu(
    166         timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
    167 
    168     dsc = "U Test, Repetitions: {} vs {}".format(
    169         len(timings_cpu[0]), len(timings_cpu[1]))
    170     dsc_color = BC_OKGREEN
    171 
    172     if min_rep_cnt < UTEST_OPTIMAL_REPETITIONS:
    173         dsc_color = BC_WARNING
    174         dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
    175             UTEST_OPTIMAL_REPETITIONS)
    176 
    177     special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
    178 
    179     last_name = partition[0][0]['name']
    180     return [color_format(use_color,
    181                          special_str,
    182                          BC_HEADER,
    183                          "{}{}".format(last_name, UTEST_COL_NAME),
    184                          first_col_width,
    185                          get_utest_color(time_pvalue), time_pvalue,
    186                          get_utest_color(cpu_pvalue), cpu_pvalue,
    187                          dsc_color, dsc,
    188                          endc=BC_ENDC)]
    189 
    190 
    191 def generate_difference_report(
    192         json1,
    193         json2,
    194         display_aggregates_only=False,
    195         utest=False,
    196         utest_alpha=0.05,
    197         use_color=True):
    198     """
    199     Calculate and report the difference between each test of two benchmarks
    200     runs specified as 'json1' and 'json2'.
    201     """
    202     assert utest is True or utest is False
    203     first_col_width = find_longest_name(json1['benchmarks'])
    204 
    205     def find_test(name):
    206         for b in json2['benchmarks']:
    207             if b['name'] == name:
    208                 return b
    209         return None
    210 
    211     first_col_width = max(
    212         first_col_width,
    213         len('Benchmark'))
    214     first_col_width += len(UTEST_COL_NAME)
    215     first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
    216         'Benchmark', 12 + first_col_width)
    217     output_strs = [first_line, '-' * len(first_line)]
    218 
    219     partitions = partition_benchmarks(json1, json2)
    220     for partition in partitions:
    221         # Careful, we may have different repetition count.
    222         for i in range(min(len(partition[0]), len(partition[1]))):
    223             bn = partition[0][i]
    224             other_bench = partition[1][i]
    225 
    226             # *If* we were asked to only display aggregates,
    227             # and if it is non-aggregate, then skip it.
    228             if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
    229                 assert bn['run_type'] == other_bench['run_type']
    230                 if bn['run_type'] != 'aggregate':
    231                     continue
    232 
    233             fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
    234 
    235             def get_color(res):
    236                 if res > 0.05:
    237                     return BC_FAIL
    238                 elif res > -0.07:
    239                     return BC_WHITE
    240                 else:
    241                     return BC_CYAN
    242 
    243             tres = calculate_change(bn['real_time'], other_bench['real_time'])
    244             cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
    245             output_strs += [color_format(use_color,
    246                                          fmt_str,
    247                                          BC_HEADER,
    248                                          bn['name'],
    249                                          first_col_width,
    250                                          get_color(tres),
    251                                          tres,
    252                                          get_color(cpures),
    253                                          cpures,
    254                                          bn['real_time'],
    255                                          other_bench['real_time'],
    256                                          bn['cpu_time'],
    257                                          other_bench['cpu_time'],
    258                                          endc=BC_ENDC)]
    259 
    260         # After processing the whole partition, if requested, do the U test.
    261         if utest:
    262             output_strs += print_utest(partition,
    263                                        utest_alpha=utest_alpha,
    264                                        first_col_width=first_col_width,
    265                                        use_color=use_color)
    266 
    267     return output_strs
    268 
    269 
    270 ###############################################################################
    271 # Unit tests
    272 
    273 
    274 class TestGetUniqueBenchmarkNames(unittest.TestCase):
    275     def load_results(self):
    276         import json
    277         testInputs = os.path.join(
    278             os.path.dirname(
    279                 os.path.realpath(__file__)),
    280             'Inputs')
    281         testOutput = os.path.join(testInputs, 'test3_run0.json')
    282         with open(testOutput, 'r') as f:
    283             json = json.load(f)
    284         return json
    285 
    286     def test_basic(self):
    287         expect_lines = [
    288             'BM_One',
    289             'BM_Two',
    290             'short',  # These two are not sorted
    291             'medium',  # These two are not sorted
    292         ]
    293         json = self.load_results()
    294         output_lines = get_unique_benchmark_names(json)
    295         print("\n")
    296         print("\n".join(output_lines))
    297         self.assertEqual(len(output_lines), len(expect_lines))
    298         for i in range(0, len(output_lines)):
    299             self.assertEqual(expect_lines[i], output_lines[i])
    300 
    301 
    302 class TestReportDifference(unittest.TestCase):
    303     def load_results(self):
    304         import json
    305         testInputs = os.path.join(
    306             os.path.dirname(
    307                 os.path.realpath(__file__)),
    308             'Inputs')
    309         testOutput1 = os.path.join(testInputs, 'test1_run1.json')
    310         testOutput2 = os.path.join(testInputs, 'test1_run2.json')
    311         with open(testOutput1, 'r') as f:
    312             json1 = json.load(f)
    313         with open(testOutput2, 'r') as f:
    314             json2 = json.load(f)
    315         return json1, json2
    316 
    317     def test_basic(self):
    318         expect_lines = [
    319             ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
    320             ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
    321             ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
    322             ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
    323             ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
    324             ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
    325             ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
    326             ['BM_100xSlower', '+99.0000', '+99.0000',
    327                 '100', '10000', '100', '10000'],
    328             ['BM_100xFaster', '-0.9900', '-0.9900',
    329                 '10000', '100', '10000', '100'],
    330             ['BM_10PercentCPUToTime', '+0.1000',
    331                 '-0.1000', '100', '110', '100', '90'],
    332             ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
    333             ['BM_BadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
    334         ]
    335         json1, json2 = self.load_results()
    336         output_lines_with_header = generate_difference_report(
    337             json1, json2, use_color=False)
    338         output_lines = output_lines_with_header[2:]
    339         print("\n")
    340         print("\n".join(output_lines_with_header))
    341         self.assertEqual(len(output_lines), len(expect_lines))
    342         for i in range(0, len(output_lines)):
    343             parts = [x for x in output_lines[i].split(' ') if x]
    344             self.assertEqual(len(parts), 7)
    345             self.assertEqual(expect_lines[i], parts)
    346 
    347 
    348 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
    349     def load_result(self):
    350         import json
    351         testInputs = os.path.join(
    352             os.path.dirname(
    353                 os.path.realpath(__file__)),
    354             'Inputs')
    355         testOutput = os.path.join(testInputs, 'test2_run.json')
    356         with open(testOutput, 'r') as f:
    357             json = json.load(f)
    358         return json
    359 
    360     def test_basic(self):
    361         expect_lines = [
    362             ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
    363             ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
    364             ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
    365             ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
    366         ]
    367         json = self.load_result()
    368         json1 = filter_benchmark(json, "BM_Z.ro", ".")
    369         json2 = filter_benchmark(json, "BM_O.e", ".")
    370         output_lines_with_header = generate_difference_report(
    371             json1, json2, use_color=False)
    372         output_lines = output_lines_with_header[2:]
    373         print("\n")
    374         print("\n".join(output_lines_with_header))
    375         self.assertEqual(len(output_lines), len(expect_lines))
    376         for i in range(0, len(output_lines)):
    377             parts = [x for x in output_lines[i].split(' ') if x]
    378             self.assertEqual(len(parts), 7)
    379             self.assertEqual(expect_lines[i], parts)
    380 
    381 
    382 class TestReportDifferenceWithUTest(unittest.TestCase):
    383     def load_results(self):
    384         import json
    385         testInputs = os.path.join(
    386             os.path.dirname(
    387                 os.path.realpath(__file__)),
    388             'Inputs')
    389         testOutput1 = os.path.join(testInputs, 'test3_run0.json')
    390         testOutput2 = os.path.join(testInputs, 'test3_run1.json')
    391         with open(testOutput1, 'r') as f:
    392             json1 = json.load(f)
    393         with open(testOutput2, 'r') as f:
    394             json2 = json.load(f)
    395         return json1, json2
    396 
    397     def test_utest(self):
    398         expect_lines = []
    399         expect_lines = [
    400             ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
    401             ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
    402             ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
    403             ['BM_Two_pvalue',
    404              '0.6985',
    405              '0.6985',
    406              'U',
    407              'Test,',
    408              'Repetitions:',
    409              '2',
    410              'vs',
    411              '2.',
    412              'WARNING:',
    413              'Results',
    414              'unreliable!',
    415              '9+',
    416              'repetitions',
    417              'recommended.'],
    418             ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
    419             ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
    420             ['short_pvalue',
    421              '0.7671',
    422              '0.1489',
    423              'U',
    424              'Test,',
    425              'Repetitions:',
    426              '2',
    427              'vs',
    428              '3.',
    429              'WARNING:',
    430              'Results',
    431              'unreliable!',
    432              '9+',
    433              'repetitions',
    434              'recommended.'],
    435             ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
    436         ]
    437         json1, json2 = self.load_results()
    438         output_lines_with_header = generate_difference_report(
    439             json1, json2, utest=True, utest_alpha=0.05, use_color=False)
    440         output_lines = output_lines_with_header[2:]
    441         print("\n")
    442         print("\n".join(output_lines_with_header))
    443         self.assertEqual(len(output_lines), len(expect_lines))
    444         for i in range(0, len(output_lines)):
    445             parts = [x for x in output_lines[i].split(' ') if x]
    446             self.assertEqual(expect_lines[i], parts)
    447 
    448 
    449 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
    450         unittest.TestCase):
    451     def load_results(self):
    452         import json
    453         testInputs = os.path.join(
    454             os.path.dirname(
    455                 os.path.realpath(__file__)),
    456             'Inputs')
    457         testOutput1 = os.path.join(testInputs, 'test3_run0.json')
    458         testOutput2 = os.path.join(testInputs, 'test3_run1.json')
    459         with open(testOutput1, 'r') as f:
    460             json1 = json.load(f)
    461         with open(testOutput2, 'r') as f:
    462             json2 = json.load(f)
    463         return json1, json2
    464 
    465     def test_utest(self):
    466         expect_lines = []
    467         expect_lines = [
    468             ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
    469             ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
    470             ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
    471             ['BM_Two_pvalue',
    472              '0.6985',
    473              '0.6985',
    474              'U',
    475              'Test,',
    476              'Repetitions:',
    477              '2',
    478              'vs',
    479              '2.',
    480              'WARNING:',
    481              'Results',
    482              'unreliable!',
    483              '9+',
    484              'repetitions',
    485              'recommended.'],
    486             ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
    487             ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
    488             ['short_pvalue',
    489              '0.7671',
    490              '0.1489',
    491              'U',
    492              'Test,',
    493              'Repetitions:',
    494              '2',
    495              'vs',
    496              '3.',
    497              'WARNING:',
    498              'Results',
    499              'unreliable!',
    500              '9+',
    501              'repetitions',
    502              'recommended.'],
    503         ]
    504         json1, json2 = self.load_results()
    505         output_lines_with_header = generate_difference_report(
    506             json1, json2, display_aggregates_only=True,
    507             utest=True, utest_alpha=0.05, use_color=False)
    508         output_lines = output_lines_with_header[2:]
    509         print("\n")
    510         print("\n".join(output_lines_with_header))
    511         self.assertEqual(len(output_lines), len(expect_lines))
    512         for i in range(0, len(output_lines)):
    513             parts = [x for x in output_lines[i].split(' ') if x]
    514             self.assertEqual(expect_lines[i], parts)
    515 
    516 
    517 if __name__ == '__main__':
    518     unittest.main()
    519 
    520 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
    521 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
    522 # kate: indent-mode python; remove-trailing-spaces modified;
	libcxx libcxx mirror with random patches
	git clone https://git.neptards.moe/neptards/libcxx.git
	Log \| Files \| Refs