1//===-- Generate random but valid function descriptors -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "automemcpy/RandomFunctionGenerator.h"
10
11#include <llvm/ADT/StringRef.h>
12#include <llvm/Support/raw_ostream.h>
13
14#include <optional>
15#include <set>
16
17namespace llvm {
18namespace automemcpy {
19
20// Exploration parameters
21// ----------------------
22// Here we define a set of values that will contraint the exploration and
23// limit combinatorial explosion.
24
25// We limit the number of cases for individual sizes to sizes up to 4.
26// More individual sizes don't bring much over the overlapping strategy.
27static constexpr int kMaxIndividualSize = 4;
28
29// We limit Overlapping Strategy to sizes up to 256.
30// An overlap of 256B means accessing 128B at once which is usually not
31// feasible by current CPUs. We rely on the compiler to generate multiple
32// loads/stores if needed but higher sizes are unlikely to benefit from hardware
33// acceleration.
34static constexpr int kMaxOverlapSize = 256;
35
36// For the loop strategies, we make sure that they iterate at least a certain
37// number of times to amortize the cost of looping.
38static constexpr int kLoopMinIter = 3;
39static constexpr int kAlignedLoopMinIter = 2;
40
41// We restrict the size of the block of data to handle in a loop.
42// Generally speaking block size <= 16 perform poorly.
43static constexpr int kLoopBlockSize[] = {16, 32, 64};
44
45// We restrict alignment to the following values.
46static constexpr int kLoopAlignments[] = {16, 32, 64};
47
48// We make sure that the region bounds are one of the following values.
49static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48,
50 64, 96, 128, 256, 512, 1024, kMaxSize};
51
52// We also allow disabling loops, aligned loops and accelerators.
53static constexpr bool kDisableLoop = false;
54static constexpr bool kDisableAlignedLoop = false;
55static constexpr bool kDisableAccelerator = false;
56
57// For memcpy, we can also explore whether aligning on source or destination has
58// an effect.
59static constexpr bool kExploreAlignmentArg = true;
60
61// The function we generate code for.
62// BCMP is specifically disabled for now.
63static constexpr int kFunctionTypes[] = {
64 (int)FunctionType::MEMCPY,
65 (int)FunctionType::MEMCMP,
66 // (int)FunctionType::BCMP,
67 (int)FunctionType::MEMSET,
68 (int)FunctionType::BZERO,
69};
70
71// The actual implementation of each function can be handled via primitive types
72// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
73// We want to move toward delegating the code generation entirely to the
74// compiler but for now we have to make use of -per microarchitecture- custom
75// implementations. Scalar being more portable but also less performant, we
76// remove it as well.
77static constexpr int kElementClasses[] = {
78 // (int)ElementTypeClass::SCALAR,
79 (int)ElementTypeClass::NATIVE,
80 // (int)ElementTypeClass::BUILTIN
81};
82
83RandomFunctionGenerator::RandomFunctionGenerator()
84 : Solver(Context), Type(Context.int_const("Type")),
85 ContiguousBegin(Context.int_const("ContiguousBegin")),
86 ContiguousEnd(Context.int_const("ContiguousEnd")),
87 OverlapBegin(Context.int_const("OverlapBegin")),
88 OverlapEnd(Context.int_const("OverlapEnd")),
89 LoopBegin(Context.int_const("LoopBegin")),
90 LoopEnd(Context.int_const("LoopEnd")),
91 LoopBlockSize(Context.int_const("LoopBlockSize")),
92 AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
93 AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
94 AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
95 AlignedAlignment(Context.int_const("AlignedAlignment")),
96 AlignedArg(Context.int_const("AlignedArg")),
97 AcceleratorBegin(Context.int_const("AcceleratorBegin")),
98 AcceleratorEnd(Context.int_const("AcceleratorEnd")),
99 ElementClass(Context.int_const("ElementClass")) {
100 // All possible functions.
101 Solver.add(inSetConstraint(Type, kFunctionTypes));
102
103 // Add constraints for region bounds.
104 addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
105 addBoundsAndAnchors(OverlapBegin, OverlapEnd);
106 addBoundsAndAnchors(LoopBegin, LoopEnd);
107 addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
108 addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
109 // We always consider strategies in this order, and we
110 // always end with the `Accelerator` strategy, as it's typically more
111 // efficient for large sizes.
112 // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
113 Solver.add(ContiguousEnd == OverlapBegin);
114 Solver.add(OverlapEnd == LoopBegin);
115 Solver.add(LoopEnd == AlignedLoopBegin);
116 Solver.add(AlignedLoopEnd == AcceleratorBegin);
117 // Fix endpoints: The minimum size that we want to copy is 0, and we always
118 // start with the `Contiguous` strategy. The max size is `kMaxSize`.
119 Solver.add(ContiguousBegin == 0);
120 Solver.add(AcceleratorEnd == kMaxSize);
121 // Contiguous
122 Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
123 // Overlap
124 Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
125 // Overlap only ever makes sense when accessing multiple bytes at a time.
126 // i.e. Overlap<1> is useless.
127 Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
128 // Loop
129 addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
130 // Aligned Loop
131 addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
132 kAlignedLoopMinIter);
133 Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
134 Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
135 Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
136 Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
137 z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
138 z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
139 Solver.add(
140 (ExploreAlignment &&
141 inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
142 (!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
143 // Accelerator
144 Solver.add(IsMemcpy ||
145 (AcceleratorBegin ==
146 AcceleratorEnd)); // Only Memcpy has accelerator for now.
147 // Element classes
148 Solver.add(inSetConstraint(ElementClass, kElementClasses));
149
150 if (kDisableLoop)
151 Solver.add(LoopBegin == LoopEnd);
152 if (kDisableAlignedLoop)
153 Solver.add(AlignedLoopBegin == AlignedLoopEnd);
154 if (kDisableAccelerator)
155 Solver.add(AcceleratorBegin == AcceleratorEnd);
156}
157
158// Creates SizeSpan from Begin/End values.
159// Returns std::nullopt if Begin==End.
160static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
161 if (Begin == End)
162 return std::nullopt;
163 SizeSpan SS;
164 SS.Begin = Begin;
165 SS.End = End;
166 return SS;
167}
168
169// Generic method to create a `Region` struct with a Span or std::nullopt if
170// span is empty.
171template <typename Region>
172static std::optional<Region> As(size_t Begin, size_t End) {
173 if (auto Span = AsSizeSpan(Begin, End)) {
174 Region Output;
175 Output.Span = *Span;
176 return Output;
177 }
178 return std::nullopt;
179}
180
181// Returns a Loop struct or std::nullopt if span is empty.
182static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
183 if (auto Span = AsSizeSpan(Begin, End)) {
184 Loop Output;
185 Output.Span = *Span;
186 Output.BlockSize = BlockSize;
187 return Output;
188 }
189 return std::nullopt;
190}
191
192// Returns an AlignedLoop struct or std::nullopt if span is empty.
193static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
194 size_t BlockSize,
195 size_t Alignment,
196 AlignArg AlignTo) {
197 if (auto Loop = AsLoop(Begin, End, BlockSize)) {
198 AlignedLoop Output;
199 Output.Loop = *Loop;
200 Output.Alignment = Alignment;
201 Output.AlignTo = AlignTo;
202 return Output;
203 }
204 return std::nullopt;
205}
206
207std::optional<FunctionDescriptor> RandomFunctionGenerator::next() {
208 if (Solver.check() != z3::sat)
209 return {};
210
211 z3::model m = Solver.get_model();
212
213 // Helper method to get the current numerical value of a z3::expr.
214 const auto E = [&m](z3::expr &V) -> int {
215 return m.eval(V).get_numeral_int();
216 };
217
218 // Fill is the function descriptor to return.
219 FunctionDescriptor R;
220 R.Type = FunctionType(E(Type));
221 R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
222 R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
223 R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
224 R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
225 E(AlignedLoopBlockSize), E(AlignedAlignment),
226 AlignArg(E(AlignedArg)));
227 R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
228 R.ElementClass = ElementTypeClass(E(ElementClass));
229
230 // Express current state as a set of constraints.
231 z3::expr CurrentLayout =
232 (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
233 (ContiguousEnd == E(ContiguousEnd)) &&
234 (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
235 (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
236 (LoopBlockSize == E(LoopBlockSize)) &&
237 (AlignedLoopBegin == E(AlignedLoopBegin)) &&
238 (AlignedLoopEnd == E(AlignedLoopEnd)) &&
239 (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
240 (AlignedAlignment == E(AlignedAlignment)) &&
241 (AlignedArg == E(AlignedArg)) &&
242 (AcceleratorBegin == E(AcceleratorBegin)) &&
243 (AcceleratorEnd == E(AcceleratorEnd)) &&
244 (ElementClass == E(ElementClass));
245
246 // Ask solver to never show this configuration ever again.
247 Solver.add(!CurrentLayout);
248 return R;
249}
250
251// Make sure `Variable` is one of the provided values.
252z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
253 ArrayRef<int> Values) const {
254 z3::expr_vector Args(Variable.ctx());
255 for (int Value : Values)
256 Args.push_back(Variable == Value);
257 return z3::mk_or(Args);
258}
259
260void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
261 z3::expr &End) {
262 // Begin and End are picked amongst a set of predefined values.
263 Solver.add(inSetConstraint(Begin, kAnchors));
264 Solver.add(inSetConstraint(End, kAnchors));
265 Solver.add(Begin >= 0);
266 Solver.add(Begin <= End);
267 Solver.add(End <= kMaxSize);
268}
269
270void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
271 const z3::expr &LoopEnd,
272 z3::expr &LoopBlockSize,
273 int LoopMinIter) {
274 Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
275 Solver.add(LoopBegin == LoopEnd ||
276 (LoopBegin > (LoopMinIter * LoopBlockSize)));
277}
278
279} // namespace automemcpy
280} // namespace llvm
281

source code of libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp