RandomFunctionGenerator.cpp source code [libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp]

1	//===-- Generate random but valid function descriptors -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "automemcpy/RandomFunctionGenerator.h"
10
11	#include <llvm/ADT/StringRef.h>
12	#include <llvm/Support/raw_ostream.h>
13
14	#include <optional>
15	#include <set>
16
17	namespace llvm {
18	namespace automemcpy {
19
20	// Exploration parameters
21	// ----------------------
22	// Here we define a set of values that will contraint the exploration and
23	// limit combinatorial explosion.
24
25	// We limit the number of cases for individual sizes to sizes up to 4.
26	// More individual sizes don't bring much over the overlapping strategy.
27	static constexpr int kMaxIndividualSize = `4`;
28
29	// We limit Overlapping Strategy to sizes up to 256.
30	// An overlap of 256B means accessing 128B at once which is usually not
31	// feasible by current CPUs. We rely on the compiler to generate multiple
32	// loads/stores if needed but higher sizes are unlikely to benefit from hardware
33	// acceleration.
34	static constexpr int kMaxOverlapSize = `256`;
35
36	// For the loop strategies, we make sure that they iterate at least a certain
37	// number of times to amortize the cost of looping.
38	static constexpr int kLoopMinIter = `3`;
39	static constexpr int kAlignedLoopMinIter = `2`;
40
41	// We restrict the size of the block of data to handle in a loop.
42	// Generally speaking block size <= 16 perform poorly.
43	static constexpr int kLoopBlockSize[] = {`16`, `32`, `64`};
44
45	// We restrict alignment to the following values.
46	static constexpr int kLoopAlignments[] = {`16`, `32`, `64`};
47
48	// We make sure that the region bounds are one of the following values.
49	static constexpr int kAnchors[] = {`0`, `1`, `2`, `4`, `8`, `16`, `32`, `48`,
50	`64`, `96`, `128`, `256`, `512`, `1024`, kMaxSize};
51
52	// We also allow disabling loops, aligned loops and accelerators.
53	static constexpr bool kDisableLoop = false;
54	static constexpr bool kDisableAlignedLoop = false;
55	static constexpr bool kDisableAccelerator = false;
56
57	// For memcpy, we can also explore whether aligning on source or destination has
58	// an effect.
59	static constexpr bool kExploreAlignmentArg = true;
60
61	// The function we generate code for.
62	// BCMP is specifically disabled for now.
63	static constexpr int kFunctionTypes[] = {
64	(int)FunctionType::MEMCPY,
65	(int)FunctionType::MEMCMP,
66	// (int)FunctionType::BCMP,
67	(int)FunctionType::MEMSET,
68	(int)FunctionType::BZERO,
69	};
70
71	// The actual implementation of each function can be handled via primitive types
72	// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
73	// We want to move toward delegating the code generation entirely to the
74	// compiler but for now we have to make use of -per microarchitecture- custom
75	// implementations. Scalar being more portable but also less performant, we
76	// remove it as well.
77	static constexpr int kElementClasses[] = {
78	// (int)ElementTypeClass::SCALAR,
79	(int)ElementTypeClass::NATIVE,
80	// (int)ElementTypeClass::BUILTIN
81	};
82
83	RandomFunctionGenerator::RandomFunctionGenerator()
84	: Solver(Context), Type(Context.int_const("Type")),
85	ContiguousBegin(Context.int_const("ContiguousBegin")),
86	ContiguousEnd(Context.int_const("ContiguousEnd")),
87	OverlapBegin(Context.int_const("OverlapBegin")),
88	OverlapEnd(Context.int_const("OverlapEnd")),
89	LoopBegin(Context.int_const("LoopBegin")),
90	LoopEnd(Context.int_const("LoopEnd")),
91	LoopBlockSize(Context.int_const("LoopBlockSize")),
92	AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
93	AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
94	AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
95	AlignedAlignment(Context.int_const("AlignedAlignment")),
96	AlignedArg(Context.int_const("AlignedArg")),
97	AcceleratorBegin(Context.int_const("AcceleratorBegin")),
98	AcceleratorEnd(Context.int_const("AcceleratorEnd")),
99	ElementClass(Context.int_const("ElementClass")) {
100	// All possible functions.
101	Solver.add(inSetConstraint(Type, kFunctionTypes));
102
103	// Add constraints for region bounds.
104	addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
105	addBoundsAndAnchors(OverlapBegin, OverlapEnd);
106	addBoundsAndAnchors(LoopBegin, LoopEnd);
107	addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
108	addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
109	// We always consider strategies in this order, and we
110	// always end with the `Accelerator` strategy, as it's typically more
111	// efficient for large sizes.
112	// Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
113	Solver.add(ContiguousEnd == OverlapBegin);
114	Solver.add(OverlapEnd == LoopBegin);
115	Solver.add(LoopEnd == AlignedLoopBegin);
116	Solver.add(AlignedLoopEnd == AcceleratorBegin);
117	// Fix endpoints: The minimum size that we want to copy is 0, and we always
118	// start with the `Contiguous` strategy. The max size is `kMaxSize`.
119	Solver.add(ContiguousBegin == `0`);
120	Solver.add(AcceleratorEnd == kMaxSize);
121	// Contiguous
122	Solver.add(ContiguousEnd <= kMaxIndividualSize + `1`);
123	// Overlap
124	Solver.add(OverlapEnd <= kMaxOverlapSize + `1`);
125	// Overlap only ever makes sense when accessing multiple bytes at a time.
126	// i.e. Overlap<1> is useless.
127	Solver.add(OverlapBegin == OverlapEnd \|\| OverlapBegin >= `2`);
128	// Loop
129	addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
130	// Aligned Loop
131	addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
132	kAlignedLoopMinIter);
133	Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
134	Solver.add(AlignedLoopBegin == AlignedLoopEnd \|\| AlignedLoopBegin >= `64`);
135	Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
136	Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
137	z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
138	z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
139	Solver.add(
140	(ExploreAlignment &&
141	inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) \|\|
142	(!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
143	// Accelerator
144	Solver.add(IsMemcpy \|\|
145	(AcceleratorBegin ==
146	AcceleratorEnd)); // Only Memcpy has accelerator for now.
147	// Element classes
148	Solver.add(inSetConstraint(ElementClass, kElementClasses));
149
150	if (kDisableLoop)
151	Solver.add(LoopBegin == LoopEnd);
152	if (kDisableAlignedLoop)
153	Solver.add(AlignedLoopBegin == AlignedLoopEnd);
154	if (kDisableAccelerator)
155	Solver.add(AcceleratorBegin == AcceleratorEnd);
156	}
157
158	// Creates SizeSpan from Begin/End values.
159	// Returns std::nullopt if Begin==End.
160	static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
161	if (Begin == End)
162	return std::nullopt;
163	SizeSpan SS;
164	SS.Begin = Begin;
165	SS.End = End;
166	return SS;
167	}
168
169	// Generic method to create a `Region` struct with a Span or std::nullopt if
170	// span is empty.
171	template <typename Region>
172	static std::optional<Region> As(size_t Begin, size_t End) {
173	if (auto Span = AsSizeSpan(Begin, End)) {
174	Region Output;
175	Output.Span = *Span;
176	return Output;
177	}
178	return std::nullopt;
179	}
180
181	// Returns a Loop struct or std::nullopt if span is empty.
182	static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
183	if (auto Span = AsSizeSpan(Begin, End)) {
184	Loop Output;
185	Output.Span = *Span;
186	Output.BlockSize = BlockSize;
187	return Output;
188	}
189	return std::nullopt;
190	}
191
192	// Returns an AlignedLoop struct or std::nullopt if span is empty.
193	static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
194	size_t BlockSize,
195	size_t Alignment,
196	AlignArg AlignTo) {
197	if (auto Loop = AsLoop(Begin, End, BlockSize)) {
198	AlignedLoop Output;
199	Output.Loop = *Loop;
200	Output.Alignment = Alignment;
201	Output.AlignTo = AlignTo;
202	return Output;
203	}
204	return std::nullopt;
205	}
206
207	std::optional<FunctionDescriptor> RandomFunctionGenerator::next() {
208	if (Solver.check() != z3::sat)
209	return {};
210
211	z3::model m = Solver.get_model();
212
213	// Helper method to get the current numerical value of a z3::expr.
214	const auto E = [&m](z3::expr &V) -> int {
215	return m.eval(V).get_numeral_int();
216	};
217
218	// Fill is the function descriptor to return.
219	FunctionDescriptor R;
220	R.Type = FunctionType(E(Type));
221	R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
222	R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
223	R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
224	R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
225	E(AlignedLoopBlockSize), E(AlignedAlignment),
226	AlignArg(E(AlignedArg)));
227	R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
228	R.ElementClass = ElementTypeClass(E(ElementClass));
229
230	// Express current state as a set of constraints.
231	z3::expr CurrentLayout =
232	(Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
233	(ContiguousEnd == E(ContiguousEnd)) &&
234	(OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
235	(LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
236	(LoopBlockSize == E(LoopBlockSize)) &&
237	(AlignedLoopBegin == E(AlignedLoopBegin)) &&
238	(AlignedLoopEnd == E(AlignedLoopEnd)) &&
239	(AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
240	(AlignedAlignment == E(AlignedAlignment)) &&
241	(AlignedArg == E(AlignedArg)) &&
242	(AcceleratorBegin == E(AcceleratorBegin)) &&
243	(AcceleratorEnd == E(AcceleratorEnd)) &&
244	(ElementClass == E(ElementClass));
245
246	// Ask solver to never show this configuration ever again.
247	Solver.add(!CurrentLayout);
248	return R;
249	}
250
251	// Make sure `Variable` is one of the provided values.
252	z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
253	ArrayRef<int> Values) const {
254	z3::expr_vector Args(Variable.ctx());
255	for (int Value : Values)
256	Args.push_back(Variable == Value);
257	return z3::mk_or(Args);
258	}
259
260	void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
261	z3::expr &End) {
262	// Begin and End are picked amongst a set of predefined values.
263	Solver.add(inSetConstraint(Begin, kAnchors));
264	Solver.add(inSetConstraint(End, kAnchors));
265	Solver.add(Begin >= `0`);
266	Solver.add(Begin <= End);
267	Solver.add(End <= kMaxSize);
268	}
269
270	void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
271	const z3::expr &LoopEnd,
272	z3::expr &LoopBlockSize,
273	int LoopMinIter) {
274	Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
275	Solver.add(LoopBegin == LoopEnd \|\|
276	(LoopBegin > (LoopMinIter * LoopBlockSize)));
277	}
278
279	} // namespace automemcpy
280	} // namespace llvm
281

source code of libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp