ResultAnalyzer.cpp source code [libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp]

1	//===-- Analyze benchmark JSON files --------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This code analyzes the json file produced by the `automemcpy` binary.
9	//
10	// As a remainder, `automemcpy` will benchmark each autogenerated memory
11	// functions against one of the predefined distributions available in the
12	// `libc/benchmarks/distributions` folder.
13	//
14	// It works as follows:
15	// - Reads one or more json files.
16	// - If there are several runs for the same function and distribution, picks the
17	// median throughput (aka `BytesPerSecond`).
18	// - Aggregates the throughput per distributions and scores them from worst (0)
19	// to best (1).
20	// - Each distribution categorizes each function into one of the following
21	// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
22	// BAD.
23	// - A process similar to the Majority Judgment voting system is used to `elect`
24	// the best function. The histogram of grades is returned so we can
25	// distinguish between functions with the same final grade. In the following
26	// example both functions grade EXCELLENT but we may prefer the second one.
27	//
28	// \| \| EXCELLENT \| VERY_GOOD \| GOOD \| PASSABLE \| ...
29	// \|------------\|-----------\|-----------\|------\|----------\| ...
30	// \| Function_1 \| 7 \| 1 \| 2 \| \| ...
31	// \| Function_2 \| 6 \| 4 \| \| \| ...
32
33	#include "automemcpy/ResultAnalyzer.h"
34	#include "llvm/ADT/StringRef.h"
35	#include <numeric>
36	#include <unordered_map>
37
38	namespace llvm {
39
40	namespace automemcpy {
41
42	StringRef Grade::getString(const GradeEnum &GE) {
43	switch (GE) {
44	case EXCELLENT:
45	return "EXCELLENT";
46	case VERY_GOOD:
47	return "VERY_GOOD";
48	case GOOD:
49	return "GOOD";
50	case PASSABLE:
51	return "PASSABLE";
52	case INADEQUATE:
53	return "INADEQUATE";
54	case MEDIOCRE:
55	return "MEDIOCRE";
56	case BAD:
57	return "BAD";
58	case ARRAY_SIZE:
59	report_fatal_error("logic error");
60	}
61	}
62
63	Grade::GradeEnum Grade::judge(double Score) {
64	if (Score >= `6.` / `7`)
65	return EXCELLENT;
66	if (Score >= `5.` / `7`)
67	return VERY_GOOD;
68	if (Score >= `4.` / `7`)
69	return GOOD;
70	if (Score >= `3.` / `7`)
71	return PASSABLE;
72	if (Score >= `2.` / `7`)
73	return INADEQUATE;
74	if (Score >= `1.` / `7`)
75	return MEDIOCRE;
76	return BAD;
77	}
78
79	static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
80	const double SampleMean) {
81	assert(!Samples.empty());
82	if (Samples.size() == `1`)
83	return `0`;
84	double DiffSquaresSum = `0`;
85	for (const double S : Samples) {
86	const double Diff = S - SampleMean;
87	DiffSquaresSum += Diff * Diff;
88	}
89	return DiffSquaresSum / (Samples.size() - `1`);
90	}
91
92	static void processPerDistributionData(PerDistributionData &Data) {
93	auto &Samples = Data.BytesPerSecondSamples;
94	assert(!Samples.empty());
95	// Sample Mean
96	const double Sum = std::accumulate(Samples.begin(), Samples.end(), `0.0`);
97	Data.BytesPerSecondMean = Sum / Samples.size();
98	// Unbiased Sample Variance
99	Data.BytesPerSecondVariance =
100	computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
101	// Median
102	const size_t HalfSize = Samples.size() / `2`;
103	std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
104	Data.BytesPerSecondMedian = Samples[HalfSize];
105	}
106
107	std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
108	std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
109	for (const auto &S : Samples) {
110	if (S.Type != SampleType::ITERATION)
111	break;
112	auto &Function = Functions[S.Id.Function];
113	auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
114	Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
115	}
116
117	std::vector<FunctionData> Output;
118	for (auto &[FunctionId, Function] : Functions) {
119	Function.Id = FunctionId;
120	for (auto &Pair : Function.PerDistributionData)
121	processPerDistributionData(Pair.second);
122	Output.push_back(std::move(Function));
123	}
124	return Output;
125	}
126
127	void fillScores(MutableArrayRef<FunctionData> Functions) {
128	// A key to bucket throughput per function type and distribution.
129	struct Key {
130	FunctionType Type;
131	StringRef Distribution;
132
133	COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
134	};
135
136	// Tracks minimum and maximum values.
137	struct MinMax {
138	double Min = std::numeric_limits<double>::max();
139	double Max = std::numeric_limits<double>::min();
140	void update(double Value) {
141	if (Value < Min)
142	Min = Value;
143	if (Value > Max)
144	Max = Value;
145	}
146	double normalize(double Value) const { return (Value - Min) / (Max - Min); }
147	};
148
149	std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
150	for (const auto &Function : Functions) {
151	const FunctionType Type = Function.Id.Type;
152	for (const auto &Pair : Function.PerDistributionData) {
153	const auto &Distribution = Pair.getKey();
154	const double Throughput = Pair.getValue().BytesPerSecondMedian;
155	const Key K{Type, Distribution};
156	ThroughputMinMax[K].update(Throughput);
157	}
158	}
159
160	for (auto &Function : Functions) {
161	const FunctionType Type = Function.Id.Type;
162	for (const auto &Pair : Function.PerDistributionData) {
163	const auto &Distribution = Pair.getKey();
164	const double Throughput = Pair.getValue().BytesPerSecondMedian;
165	const Key K{Type, Distribution};
166	Function.PerDistributionData[Distribution].Score =
167	ThroughputMinMax[K].normalize(Throughput);
168	}
169	}
170	}
171
172	void castVotes(MutableArrayRef<FunctionData> Functions) {
173	for (FunctionData &Function : Functions) {
174	Function.ScoresGeoMean = `1.0`;
175	for (const auto &Pair : Function.PerDistributionData) {
176	const StringRef Distribution = Pair.getKey();
177	const double Score = Pair.getValue().Score;
178	Function.ScoresGeoMean *= Score;
179	const auto G = Grade::judge(Score);
180	++(Function.GradeHisto[G]);
181	Function.PerDistributionData[Distribution].Grade = G;
182	}
183	}
184
185	for (FunctionData &Function : Functions) {
186	const auto &GradeHisto = Function.GradeHisto;
187	const size_t Votes =
188	std::accumulate(GradeHisto.begin(), GradeHisto.end(), `0U`);
189	const size_t MedianVote = Votes / `2`;
190	size_t CountedVotes = `0`;
191	Grade::GradeEnum MedianGrade = Grade::BAD;
192	for (size_t I = `0`; I < GradeHisto.size(); ++I) {
193	CountedVotes += GradeHisto[I];
194	if (CountedVotes > MedianVote) {
195	MedianGrade = Grade::GradeEnum(I);
196	break;
197	}
198	}
199	Function.FinalGrade = MedianGrade;
200	}
201	}
202
203	} // namespace automemcpy
204	} // namespace llvm
205

source code of libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp