compare.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. #!/usr/bin/env python
  2. """
  3. compare.py - versatile benchmark output compare tool
  4. """
  5. import argparse
  6. from argparse import ArgumentParser
  7. import sys
  8. import gbench
  9. from gbench import util, report
  10. from gbench.util import *
  11. def check_inputs(in1, in2, flags):
  12. """
  13. Perform checking on the user provided inputs and diagnose any abnormalities
  14. """
  15. in1_kind, in1_err = classify_input_file(in1)
  16. in2_kind, in2_err = classify_input_file(in2)
  17. output_file = find_benchmark_flag('--benchmark_out=', flags)
  18. output_type = find_benchmark_flag('--benchmark_out_format=', flags)
  19. if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
  20. print(("WARNING: '--benchmark_out=%s' will be passed to both "
  21. "benchmarks causing it to be overwritten") % output_file)
  22. if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
  23. print("WARNING: passing optional flags has no effect since both "
  24. "inputs are JSON")
  25. if output_type is not None and output_type != 'json':
  26. print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
  27. " is not supported.") % output_type)
  28. sys.exit(1)
  29. def create_parser():
  30. parser = ArgumentParser(
  31. description='versatile benchmark output compare tool')
  32. subparsers = parser.add_subparsers(
  33. help='This tool has multiple modes of operation:',
  34. dest='mode')
  35. parser_a = subparsers.add_parser(
  36. 'benchmarks',
  37. help='The most simple use-case, compare all the output of these two benchmarks')
  38. baseline = parser_a.add_argument_group(
  39. 'baseline', 'The benchmark baseline')
  40. baseline.add_argument(
  41. 'test_baseline',
  42. metavar='test_baseline',
  43. type=argparse.FileType('r'),
  44. nargs=1,
  45. help='A benchmark executable or JSON output file')
  46. contender = parser_a.add_argument_group(
  47. 'contender', 'The benchmark that will be compared against the baseline')
  48. contender.add_argument(
  49. 'test_contender',
  50. metavar='test_contender',
  51. type=argparse.FileType('r'),
  52. nargs=1,
  53. help='A benchmark executable or JSON output file')
  54. parser_a.add_argument(
  55. 'benchmark_options',
  56. metavar='benchmark_options',
  57. nargs=argparse.REMAINDER,
  58. help='Arguments to pass when running benchmark executables')
  59. parser_b = subparsers.add_parser(
  60. 'filters', help='Compare filter one with the filter two of benchmark')
  61. baseline = parser_b.add_argument_group(
  62. 'baseline', 'The benchmark baseline')
  63. baseline.add_argument(
  64. 'test',
  65. metavar='test',
  66. type=argparse.FileType('r'),
  67. nargs=1,
  68. help='A benchmark executable or JSON output file')
  69. baseline.add_argument(
  70. 'filter_baseline',
  71. metavar='filter_baseline',
  72. type=str,
  73. nargs=1,
  74. help='The first filter, that will be used as baseline')
  75. contender = parser_b.add_argument_group(
  76. 'contender', 'The benchmark that will be compared against the baseline')
  77. contender.add_argument(
  78. 'filter_contender',
  79. metavar='filter_contender',
  80. type=str,
  81. nargs=1,
  82. help='The second filter, that will be compared against the baseline')
  83. parser_b.add_argument(
  84. 'benchmark_options',
  85. metavar='benchmark_options',
  86. nargs=argparse.REMAINDER,
  87. help='Arguments to pass when running benchmark executables')
  88. parser_c = subparsers.add_parser(
  89. 'benchmarksfiltered',
  90. help='Compare filter one of first benchmark with filter two of the second benchmark')
  91. baseline = parser_c.add_argument_group(
  92. 'baseline', 'The benchmark baseline')
  93. baseline.add_argument(
  94. 'test_baseline',
  95. metavar='test_baseline',
  96. type=argparse.FileType('r'),
  97. nargs=1,
  98. help='A benchmark executable or JSON output file')
  99. baseline.add_argument(
  100. 'filter_baseline',
  101. metavar='filter_baseline',
  102. type=str,
  103. nargs=1,
  104. help='The first filter, that will be used as baseline')
  105. contender = parser_c.add_argument_group(
  106. 'contender', 'The benchmark that will be compared against the baseline')
  107. contender.add_argument(
  108. 'test_contender',
  109. metavar='test_contender',
  110. type=argparse.FileType('r'),
  111. nargs=1,
  112. help='The second benchmark executable or JSON output file, that will be compared against the baseline')
  113. contender.add_argument(
  114. 'filter_contender',
  115. metavar='filter_contender',
  116. type=str,
  117. nargs=1,
  118. help='The second filter, that will be compared against the baseline')
  119. parser_c.add_argument(
  120. 'benchmark_options',
  121. metavar='benchmark_options',
  122. nargs=argparse.REMAINDER,
  123. help='Arguments to pass when running benchmark executables')
  124. return parser
  125. def main():
  126. # Parse the command line flags
  127. parser = create_parser()
  128. args, unknown_args = parser.parse_known_args()
  129. if args.mode is None:
  130. parser.print_help()
  131. exit(1)
  132. assert not unknown_args
  133. benchmark_options = args.benchmark_options
  134. if args.mode == 'benchmarks':
  135. test_baseline = args.test_baseline[0].name
  136. test_contender = args.test_contender[0].name
  137. filter_baseline = ''
  138. filter_contender = ''
  139. # NOTE: if test_baseline == test_contender, you are analyzing the stdev
  140. description = 'Comparing %s to %s' % (test_baseline, test_contender)
  141. elif args.mode == 'filters':
  142. test_baseline = args.test[0].name
  143. test_contender = args.test[0].name
  144. filter_baseline = args.filter_baseline[0]
  145. filter_contender = args.filter_contender[0]
  146. # NOTE: if filter_baseline == filter_contender, you are analyzing the
  147. # stdev
  148. description = 'Comparing %s to %s (from %s)' % (
  149. filter_baseline, filter_contender, args.test[0].name)
  150. elif args.mode == 'benchmarksfiltered':
  151. test_baseline = args.test_baseline[0].name
  152. test_contender = args.test_contender[0].name
  153. filter_baseline = args.filter_baseline[0]
  154. filter_contender = args.filter_contender[0]
  155. # NOTE: if test_baseline == test_contender and
  156. # filter_baseline == filter_contender, you are analyzing the stdev
  157. description = 'Comparing %s (from %s) to %s (from %s)' % (
  158. filter_baseline, test_baseline, filter_contender, test_contender)
  159. else:
  160. # should never happen
  161. print("Unrecognized mode of operation: '%s'" % args.mode)
  162. parser.print_help()
  163. exit(1)
  164. check_inputs(test_baseline, test_contender, benchmark_options)
  165. options_baseline = []
  166. options_contender = []
  167. if filter_baseline and filter_contender:
  168. options_baseline = ['--benchmark_filter=%s' % filter_baseline]
  169. options_contender = ['--benchmark_filter=%s' % filter_contender]
  170. # Run the benchmarks and report the results
  171. json1 = json1_orig = gbench.util.run_or_load_benchmark(
  172. test_baseline, benchmark_options + options_baseline)
  173. json2 = json2_orig = gbench.util.run_or_load_benchmark(
  174. test_contender, benchmark_options + options_contender)
  175. # Now, filter the benchmarks so that the difference report can work
  176. if filter_baseline and filter_contender:
  177. replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
  178. json1 = gbench.report.filter_benchmark(
  179. json1_orig, filter_baseline, replacement)
  180. json2 = gbench.report.filter_benchmark(
  181. json2_orig, filter_contender, replacement)
  182. # Diff and output
  183. output_lines = gbench.report.generate_difference_report(json1, json2)
  184. print(description)
  185. for ln in output_lines:
  186. print(ln)
  187. import unittest
  188. class TestParser(unittest.TestCase):
  189. def setUp(self):
  190. self.parser = create_parser()
  191. testInputs = os.path.join(
  192. os.path.dirname(
  193. os.path.realpath(__file__)),
  194. 'gbench',
  195. 'Inputs')
  196. self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
  197. self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
  198. def test_benchmarks_basic(self):
  199. parsed = self.parser.parse_args(
  200. ['benchmarks', self.testInput0, self.testInput1])
  201. self.assertEqual(parsed.mode, 'benchmarks')
  202. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  203. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  204. self.assertFalse(parsed.benchmark_options)
  205. def test_benchmarks_with_remainder(self):
  206. parsed = self.parser.parse_args(
  207. ['benchmarks', self.testInput0, self.testInput1, 'd'])
  208. self.assertEqual(parsed.mode, 'benchmarks')
  209. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  210. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  211. self.assertEqual(parsed.benchmark_options, ['d'])
  212. def test_benchmarks_with_remainder_after_doubleminus(self):
  213. parsed = self.parser.parse_args(
  214. ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
  215. self.assertEqual(parsed.mode, 'benchmarks')
  216. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  217. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  218. self.assertEqual(parsed.benchmark_options, ['e'])
  219. def test_filters_basic(self):
  220. parsed = self.parser.parse_args(
  221. ['filters', self.testInput0, 'c', 'd'])
  222. self.assertEqual(parsed.mode, 'filters')
  223. self.assertEqual(parsed.test[0].name, self.testInput0)
  224. self.assertEqual(parsed.filter_baseline[0], 'c')
  225. self.assertEqual(parsed.filter_contender[0], 'd')
  226. self.assertFalse(parsed.benchmark_options)
  227. def test_filters_with_remainder(self):
  228. parsed = self.parser.parse_args(
  229. ['filters', self.testInput0, 'c', 'd', 'e'])
  230. self.assertEqual(parsed.mode, 'filters')
  231. self.assertEqual(parsed.test[0].name, self.testInput0)
  232. self.assertEqual(parsed.filter_baseline[0], 'c')
  233. self.assertEqual(parsed.filter_contender[0], 'd')
  234. self.assertEqual(parsed.benchmark_options, ['e'])
  235. def test_filters_with_remainder_after_doubleminus(self):
  236. parsed = self.parser.parse_args(
  237. ['filters', self.testInput0, 'c', 'd', '--', 'f'])
  238. self.assertEqual(parsed.mode, 'filters')
  239. self.assertEqual(parsed.test[0].name, self.testInput0)
  240. self.assertEqual(parsed.filter_baseline[0], 'c')
  241. self.assertEqual(parsed.filter_contender[0], 'd')
  242. self.assertEqual(parsed.benchmark_options, ['f'])
  243. def test_benchmarksfiltered_basic(self):
  244. parsed = self.parser.parse_args(
  245. ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
  246. self.assertEqual(parsed.mode, 'benchmarksfiltered')
  247. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  248. self.assertEqual(parsed.filter_baseline[0], 'c')
  249. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  250. self.assertEqual(parsed.filter_contender[0], 'e')
  251. self.assertFalse(parsed.benchmark_options)
  252. def test_benchmarksfiltered_with_remainder(self):
  253. parsed = self.parser.parse_args(
  254. ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
  255. self.assertEqual(parsed.mode, 'benchmarksfiltered')
  256. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  257. self.assertEqual(parsed.filter_baseline[0], 'c')
  258. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  259. self.assertEqual(parsed.filter_contender[0], 'e')
  260. self.assertEqual(parsed.benchmark_options[0], 'f')
  261. def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
  262. parsed = self.parser.parse_args(
  263. ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
  264. self.assertEqual(parsed.mode, 'benchmarksfiltered')
  265. self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
  266. self.assertEqual(parsed.filter_baseline[0], 'c')
  267. self.assertEqual(parsed.test_contender[0].name, self.testInput1)
  268. self.assertEqual(parsed.filter_contender[0], 'e')
  269. self.assertEqual(parsed.benchmark_options[0], 'g')
  270. if __name__ == '__main__':
  271. # unittest.main()
  272. main()
  273. # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  274. # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
  275. # kate: indent-mode python; remove-trailing-spaces modified;