1//===-- Analyze benchmark JSON files --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This code analyzes the json file produced by the `automemcpy` binary.
9//
10// As a remainder, `automemcpy` will benchmark each autogenerated memory
11// functions against one of the predefined distributions available in the
12// `libc/benchmarks/distributions` folder.
13//
14// It works as follows:
15// - Reads one or more json files.
16// - If there are several runs for the same function and distribution, picks the
17// median throughput (aka `BytesPerSecond`).
18// - Aggregates the throughput per distributions and scores them from worst (0)
19// to best (1).
20// - Each distribution categorizes each function into one of the following
21// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
22// BAD.
23// - A process similar to the Majority Judgment voting system is used to `elect`
24// the best function. The histogram of grades is returned so we can
25// distinguish between functions with the same final grade. In the following
26// example both functions grade EXCELLENT but we may prefer the second one.
27//
28// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
29// |------------|-----------|-----------|------|----------| ...
30// | Function_1 | 7 | 1 | 2 | | ...
31// | Function_2 | 6 | 4 | | | ...
32
33#include "automemcpy/ResultAnalyzer.h"
34#include "llvm/ADT/StringRef.h"
35#include <numeric>
36#include <unordered_map>
37
38namespace llvm {
39
40namespace automemcpy {
41
42StringRef Grade::getString(const GradeEnum &GE) {
43 switch (GE) {
44 case EXCELLENT:
45 return "EXCELLENT";
46 case VERY_GOOD:
47 return "VERY_GOOD";
48 case GOOD:
49 return "GOOD";
50 case PASSABLE:
51 return "PASSABLE";
52 case INADEQUATE:
53 return "INADEQUATE";
54 case MEDIOCRE:
55 return "MEDIOCRE";
56 case BAD:
57 return "BAD";
58 case ARRAY_SIZE:
59 report_fatal_error("logic error");
60 }
61}
62
63Grade::GradeEnum Grade::judge(double Score) {
64 if (Score >= 6. / 7)
65 return EXCELLENT;
66 if (Score >= 5. / 7)
67 return VERY_GOOD;
68 if (Score >= 4. / 7)
69 return GOOD;
70 if (Score >= 3. / 7)
71 return PASSABLE;
72 if (Score >= 2. / 7)
73 return INADEQUATE;
74 if (Score >= 1. / 7)
75 return MEDIOCRE;
76 return BAD;
77}
78
79static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
80 const double SampleMean) {
81 assert(!Samples.empty());
82 if (Samples.size() == 1)
83 return 0;
84 double DiffSquaresSum = 0;
85 for (const double S : Samples) {
86 const double Diff = S - SampleMean;
87 DiffSquaresSum += Diff * Diff;
88 }
89 return DiffSquaresSum / (Samples.size() - 1);
90}
91
92static void processPerDistributionData(PerDistributionData &Data) {
93 auto &Samples = Data.BytesPerSecondSamples;
94 assert(!Samples.empty());
95 // Sample Mean
96 const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
97 Data.BytesPerSecondMean = Sum / Samples.size();
98 // Unbiased Sample Variance
99 Data.BytesPerSecondVariance =
100 computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
101 // Median
102 const size_t HalfSize = Samples.size() / 2;
103 std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
104 Data.BytesPerSecondMedian = Samples[HalfSize];
105}
106
107std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
108 std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
109 for (const auto &S : Samples) {
110 if (S.Type != SampleType::ITERATION)
111 break;
112 auto &Function = Functions[S.Id.Function];
113 auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
114 Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
115 }
116
117 std::vector<FunctionData> Output;
118 for (auto &[FunctionId, Function] : Functions) {
119 Function.Id = FunctionId;
120 for (auto &Pair : Function.PerDistributionData)
121 processPerDistributionData(Pair.second);
122 Output.push_back(std::move(Function));
123 }
124 return Output;
125}
126
127void fillScores(MutableArrayRef<FunctionData> Functions) {
128 // A key to bucket throughput per function type and distribution.
129 struct Key {
130 FunctionType Type;
131 StringRef Distribution;
132
133 COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
134 };
135
136 // Tracks minimum and maximum values.
137 struct MinMax {
138 double Min = std::numeric_limits<double>::max();
139 double Max = std::numeric_limits<double>::min();
140 void update(double Value) {
141 if (Value < Min)
142 Min = Value;
143 if (Value > Max)
144 Max = Value;
145 }
146 double normalize(double Value) const { return (Value - Min) / (Max - Min); }
147 };
148
149 std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
150 for (const auto &Function : Functions) {
151 const FunctionType Type = Function.Id.Type;
152 for (const auto &Pair : Function.PerDistributionData) {
153 const auto &Distribution = Pair.getKey();
154 const double Throughput = Pair.getValue().BytesPerSecondMedian;
155 const Key K{Type, Distribution};
156 ThroughputMinMax[K].update(Throughput);
157 }
158 }
159
160 for (auto &Function : Functions) {
161 const FunctionType Type = Function.Id.Type;
162 for (const auto &Pair : Function.PerDistributionData) {
163 const auto &Distribution = Pair.getKey();
164 const double Throughput = Pair.getValue().BytesPerSecondMedian;
165 const Key K{Type, Distribution};
166 Function.PerDistributionData[Distribution].Score =
167 ThroughputMinMax[K].normalize(Throughput);
168 }
169 }
170}
171
172void castVotes(MutableArrayRef<FunctionData> Functions) {
173 for (FunctionData &Function : Functions) {
174 Function.ScoresGeoMean = 1.0;
175 for (const auto &Pair : Function.PerDistributionData) {
176 const StringRef Distribution = Pair.getKey();
177 const double Score = Pair.getValue().Score;
178 Function.ScoresGeoMean *= Score;
179 const auto G = Grade::judge(Score);
180 ++(Function.GradeHisto[G]);
181 Function.PerDistributionData[Distribution].Grade = G;
182 }
183 }
184
185 for (FunctionData &Function : Functions) {
186 const auto &GradeHisto = Function.GradeHisto;
187 const size_t Votes =
188 std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
189 const size_t MedianVote = Votes / 2;
190 size_t CountedVotes = 0;
191 Grade::GradeEnum MedianGrade = Grade::BAD;
192 for (size_t I = 0; I < GradeHisto.size(); ++I) {
193 CountedVotes += GradeHisto[I];
194 if (CountedVotes > MedianVote) {
195 MedianGrade = Grade::GradeEnum(I);
196 break;
197 }
198 }
199 Function.FinalGrade = MedianGrade;
200 }
201}
202
203} // namespace automemcpy
204} // namespace llvm
205

source code of libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp