1 | //===-- Analyze benchmark JSON files --------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This code analyzes the json file produced by the `automemcpy` binary. |
9 | // |
10 | // As a remainder, `automemcpy` will benchmark each autogenerated memory |
11 | // functions against one of the predefined distributions available in the |
12 | // `libc/benchmarks/distributions` folder. |
13 | // |
14 | // It works as follows: |
15 | // - Reads one or more json files. |
16 | // - If there are several runs for the same function and distribution, picks the |
17 | // median throughput (aka `BytesPerSecond`). |
18 | // - Aggregates the throughput per distributions and scores them from worst (0) |
19 | // to best (1). |
20 | // - Each distribution categorizes each function into one of the following |
21 | // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, |
22 | // BAD. |
23 | // - A process similar to the Majority Judgment voting system is used to `elect` |
24 | // the best function. The histogram of grades is returned so we can |
25 | // distinguish between functions with the same final grade. In the following |
26 | // example both functions grade EXCELLENT but we may prefer the second one. |
27 | // |
28 | // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... |
29 | // |------------|-----------|-----------|------|----------| ... |
30 | // | Function_1 | 7 | 1 | 2 | | ... |
31 | // | Function_2 | 6 | 4 | | | ... |
32 | |
33 | #include "automemcpy/ResultAnalyzer.h" |
34 | #include "llvm/ADT/StringRef.h" |
35 | #include <numeric> |
36 | #include <unordered_map> |
37 | |
38 | namespace llvm { |
39 | |
40 | namespace automemcpy { |
41 | |
42 | StringRef Grade::getString(const GradeEnum &GE) { |
43 | switch (GE) { |
44 | case EXCELLENT: |
45 | return "EXCELLENT" ; |
46 | case VERY_GOOD: |
47 | return "VERY_GOOD" ; |
48 | case GOOD: |
49 | return "GOOD" ; |
50 | case PASSABLE: |
51 | return "PASSABLE" ; |
52 | case INADEQUATE: |
53 | return "INADEQUATE" ; |
54 | case MEDIOCRE: |
55 | return "MEDIOCRE" ; |
56 | case BAD: |
57 | return "BAD" ; |
58 | case ARRAY_SIZE: |
59 | report_fatal_error("logic error" ); |
60 | } |
61 | } |
62 | |
63 | Grade::GradeEnum Grade::judge(double Score) { |
64 | if (Score >= 6. / 7) |
65 | return EXCELLENT; |
66 | if (Score >= 5. / 7) |
67 | return VERY_GOOD; |
68 | if (Score >= 4. / 7) |
69 | return GOOD; |
70 | if (Score >= 3. / 7) |
71 | return PASSABLE; |
72 | if (Score >= 2. / 7) |
73 | return INADEQUATE; |
74 | if (Score >= 1. / 7) |
75 | return MEDIOCRE; |
76 | return BAD; |
77 | } |
78 | |
79 | static double computeUnbiasedSampleVariance(const std::vector<double> &Samples, |
80 | const double SampleMean) { |
81 | assert(!Samples.empty()); |
82 | if (Samples.size() == 1) |
83 | return 0; |
84 | double DiffSquaresSum = 0; |
85 | for (const double S : Samples) { |
86 | const double Diff = S - SampleMean; |
87 | DiffSquaresSum += Diff * Diff; |
88 | } |
89 | return DiffSquaresSum / (Samples.size() - 1); |
90 | } |
91 | |
92 | static void processPerDistributionData(PerDistributionData &Data) { |
93 | auto &Samples = Data.BytesPerSecondSamples; |
94 | assert(!Samples.empty()); |
95 | // Sample Mean |
96 | const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); |
97 | Data.BytesPerSecondMean = Sum / Samples.size(); |
98 | // Unbiased Sample Variance |
99 | Data.BytesPerSecondVariance = |
100 | computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); |
101 | // Median |
102 | const size_t HalfSize = Samples.size() / 2; |
103 | std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); |
104 | Data.BytesPerSecondMedian = Samples[HalfSize]; |
105 | } |
106 | |
107 | std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) { |
108 | std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions; |
109 | for (const auto &S : Samples) { |
110 | if (S.Type != SampleType::ITERATION) |
111 | break; |
112 | auto &Function = Functions[S.Id.Function]; |
113 | auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; |
114 | Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); |
115 | } |
116 | |
117 | std::vector<FunctionData> Output; |
118 | for (auto &[FunctionId, Function] : Functions) { |
119 | Function.Id = FunctionId; |
120 | for (auto &Pair : Function.PerDistributionData) |
121 | processPerDistributionData(Pair.second); |
122 | Output.push_back(std::move(Function)); |
123 | } |
124 | return Output; |
125 | } |
126 | |
127 | void fillScores(MutableArrayRef<FunctionData> Functions) { |
128 | // A key to bucket throughput per function type and distribution. |
129 | struct Key { |
130 | FunctionType Type; |
131 | StringRef Distribution; |
132 | |
133 | COMPARABLE_AND_HASHABLE(Key, Type, Distribution) |
134 | }; |
135 | |
136 | // Tracks minimum and maximum values. |
137 | struct MinMax { |
138 | double Min = std::numeric_limits<double>::max(); |
139 | double Max = std::numeric_limits<double>::min(); |
140 | void update(double Value) { |
141 | if (Value < Min) |
142 | Min = Value; |
143 | if (Value > Max) |
144 | Max = Value; |
145 | } |
146 | double normalize(double Value) const { return (Value - Min) / (Max - Min); } |
147 | }; |
148 | |
149 | std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax; |
150 | for (const auto &Function : Functions) { |
151 | const FunctionType Type = Function.Id.Type; |
152 | for (const auto &Pair : Function.PerDistributionData) { |
153 | const auto &Distribution = Pair.getKey(); |
154 | const double Throughput = Pair.getValue().BytesPerSecondMedian; |
155 | const Key K{Type, Distribution}; |
156 | ThroughputMinMax[K].update(Throughput); |
157 | } |
158 | } |
159 | |
160 | for (auto &Function : Functions) { |
161 | const FunctionType Type = Function.Id.Type; |
162 | for (const auto &Pair : Function.PerDistributionData) { |
163 | const auto &Distribution = Pair.getKey(); |
164 | const double Throughput = Pair.getValue().BytesPerSecondMedian; |
165 | const Key K{Type, Distribution}; |
166 | Function.PerDistributionData[Distribution].Score = |
167 | ThroughputMinMax[K].normalize(Throughput); |
168 | } |
169 | } |
170 | } |
171 | |
172 | void castVotes(MutableArrayRef<FunctionData> Functions) { |
173 | for (FunctionData &Function : Functions) { |
174 | Function.ScoresGeoMean = 1.0; |
175 | for (const auto &Pair : Function.PerDistributionData) { |
176 | const StringRef Distribution = Pair.getKey(); |
177 | const double Score = Pair.getValue().Score; |
178 | Function.ScoresGeoMean *= Score; |
179 | const auto G = Grade::judge(Score); |
180 | ++(Function.GradeHisto[G]); |
181 | Function.PerDistributionData[Distribution].Grade = G; |
182 | } |
183 | } |
184 | |
185 | for (FunctionData &Function : Functions) { |
186 | const auto &GradeHisto = Function.GradeHisto; |
187 | const size_t Votes = |
188 | std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); |
189 | const size_t MedianVote = Votes / 2; |
190 | size_t CountedVotes = 0; |
191 | Grade::GradeEnum MedianGrade = Grade::BAD; |
192 | for (size_t I = 0; I < GradeHisto.size(); ++I) { |
193 | CountedVotes += GradeHisto[I]; |
194 | if (CountedVotes > MedianVote) { |
195 | MedianGrade = Grade::GradeEnum(I); |
196 | break; |
197 | } |
198 | } |
199 | Function.FinalGrade = MedianGrade; |
200 | } |
201 | } |
202 | |
203 | } // namespace automemcpy |
204 | } // namespace llvm |
205 | |