1 | /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ |
2 | |* |
3 | |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | |* See https://llvm.org/LICENSE.txt for license information. |
5 | |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | |* |
7 | \*===----------------------------------------------------------------------===*/ |
8 | |
9 | #include <assert.h> |
10 | #include <limits.h> |
11 | #include <stdio.h> |
12 | #include <stdlib.h> |
13 | #include <string.h> |
14 | |
15 | #include "InstrProfiling.h" |
16 | #include "InstrProfilingInternal.h" |
17 | #include "InstrProfilingUtil.h" |
18 | |
19 | #define INSTR_PROF_VALUE_PROF_DATA |
20 | #define INSTR_PROF_COMMON_API_IMPL |
21 | #define INSTR_PROF_VALUE_PROF_MEMOP_API |
22 | #include "profile/InstrProfData.inc" |
23 | |
24 | static int hasStaticCounters = 1; |
25 | static int OutOfNodesWarnings = 0; |
26 | static int hasNonDefaultValsPerSite = 0; |
27 | #define INSTR_PROF_MAX_VP_WARNS 10 |
28 | #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 |
29 | #define INSTR_PROF_VNODE_POOL_SIZE 1024 |
30 | |
31 | #ifndef _MSC_VER |
32 | /* A shared static pool in addition to the vnodes statically |
33 | * allocated by the compiler. */ |
34 | COMPILER_RT_VISIBILITY ValueProfNode |
35 | lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( |
36 | COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); |
37 | #endif |
38 | |
39 | COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = |
40 | INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; |
41 | |
42 | COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) { |
43 | const char *Str = 0; |
44 | Str = getenv(name: "LLVM_VP_MAX_NUM_VALS_PER_SITE" ); |
45 | if (Str && Str[0]) { |
46 | VPMaxNumValsPerSite = atoi(nptr: Str); |
47 | hasNonDefaultValsPerSite = 1; |
48 | } |
49 | if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) |
50 | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; |
51 | } |
52 | |
53 | COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { |
54 | VPMaxNumValsPerSite = MaxVals; |
55 | hasNonDefaultValsPerSite = 1; |
56 | } |
57 | |
58 | /* This method is only used in value profiler mock testing. */ |
59 | COMPILER_RT_VISIBILITY void |
60 | __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, |
61 | uint32_t ValueKind, uint16_t NumValueSites) { |
62 | #ifdef __GNUC__ |
63 | #pragma GCC diagnostic push |
64 | #pragma GCC diagnostic ignored "-Wcast-qual" |
65 | #elif defined(__clang__) |
66 | #pragma clang diagnostic push |
67 | #pragma clang diagnostic ignored "-Wcast-qual" |
68 | #endif |
69 | *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; |
70 | #ifdef __GNUC__ |
71 | #pragma GCC diagnostic pop |
72 | #elif defined(__clang__) |
73 | #pragma clang diagnostic pop |
74 | #endif |
75 | } |
76 | |
77 | /* This method is only used in value profiler mock testing. */ |
78 | COMPILER_RT_VISIBILITY const __llvm_profile_data * |
79 | __llvm_profile_iterate_data(const __llvm_profile_data *Data) { |
80 | return Data + 1; |
81 | } |
82 | |
83 | /* This method is only used in value profiler mock testing. */ |
84 | COMPILER_RT_VISIBILITY void * |
85 | __llvm_get_function_addr(const __llvm_profile_data *Data) { |
86 | return Data->FunctionPointer; |
87 | } |
88 | |
89 | /* Allocate an array that holds the pointers to the linked lists of |
90 | * value profile counter nodes. The number of element of the array |
91 | * is the total number of value profile sites instrumented. Returns |
92 | * 0 if allocation fails. |
93 | */ |
94 | |
95 | static int allocateValueProfileCounters(__llvm_profile_data *Data) { |
96 | uint64_t NumVSites = 0; |
97 | uint32_t VKI; |
98 | |
99 | /* This function will never be called when value site array is allocated |
100 | statically at compile time. */ |
101 | hasStaticCounters = 0; |
102 | /* When dynamic allocation is enabled, allow tracking the max number of |
103 | * values allowd. */ |
104 | if (!hasNonDefaultValsPerSite) |
105 | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; |
106 | |
107 | for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) |
108 | NumVSites += Data->NumValueSites[VKI]; |
109 | |
110 | // If NumVSites = 0, calloc is allowed to return a non-null pointer. |
111 | assert(NumVSites > 0 && "NumVSites can't be zero" ); |
112 | ValueProfNode **Mem = |
113 | (ValueProfNode **)calloc(nmemb: NumVSites, size: sizeof(ValueProfNode *)); |
114 | if (!Mem) |
115 | return 0; |
116 | if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { |
117 | free(ptr: Mem); |
118 | return 0; |
119 | } |
120 | return 1; |
121 | } |
122 | |
123 | static ValueProfNode *allocateOneNode(void) { |
124 | ValueProfNode *Node; |
125 | |
126 | if (!hasStaticCounters) |
127 | return (ValueProfNode *)calloc(nmemb: 1, size: sizeof(ValueProfNode)); |
128 | |
129 | /* Early check to avoid value wrapping around. */ |
130 | if (CurrentVNode + 1 > EndVNode) { |
131 | if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { |
132 | PROF_WARN("Unable to track new values: %s. " |
133 | " Consider using option -mllvm -vp-counters-per-site=<n> to " |
134 | "allocate more" |
135 | " value profile counters at compile time. \n" , |
136 | "Running out of static counters" ); |
137 | } |
138 | return 0; |
139 | } |
140 | Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); |
141 | /* Due to section padding, EndVNode point to a byte which is one pass |
142 | * an incomplete VNode, so we need to skip the last incomplete node. */ |
143 | if (Node + 1 > EndVNode) |
144 | return 0; |
145 | |
146 | return Node; |
147 | } |
148 | |
149 | static COMPILER_RT_ALWAYS_INLINE void |
150 | instrumentTargetValueImpl(uint64_t TargetValue, void *Data, |
151 | uint32_t CounterIndex, uint64_t CountValue) { |
152 | __llvm_profile_data *PData = (__llvm_profile_data *)Data; |
153 | if (!PData) |
154 | return; |
155 | if (!CountValue) |
156 | return; |
157 | if (!PData->Values) { |
158 | if (!allocateValueProfileCounters(Data: PData)) |
159 | return; |
160 | } |
161 | |
162 | ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; |
163 | ValueProfNode *PrevVNode = NULL; |
164 | ValueProfNode *MinCountVNode = NULL; |
165 | ValueProfNode *CurVNode = ValueCounters[CounterIndex]; |
166 | uint64_t MinCount = UINT64_MAX; |
167 | |
168 | uint8_t VDataCount = 0; |
169 | while (CurVNode) { |
170 | if (TargetValue == CurVNode->Value) { |
171 | CurVNode->Count += CountValue; |
172 | return; |
173 | } |
174 | if (CurVNode->Count < MinCount) { |
175 | MinCount = CurVNode->Count; |
176 | MinCountVNode = CurVNode; |
177 | } |
178 | PrevVNode = CurVNode; |
179 | CurVNode = CurVNode->Next; |
180 | ++VDataCount; |
181 | } |
182 | |
183 | if (VDataCount >= VPMaxNumValsPerSite) { |
184 | /* Bump down the min count node's count. If it reaches 0, |
185 | * evict it. This eviction/replacement policy makes hot |
186 | * targets more sticky while cold targets less so. In other |
187 | * words, it makes it less likely for the hot targets to be |
188 | * prematurally evicted during warmup/establishment period, |
189 | * when their counts are still low. In a special case when |
190 | * the number of values tracked is reduced to only one, this |
191 | * policy will guarantee that the dominating target with >50% |
192 | * total count will survive in the end. Note that this scheme |
193 | * allows the runtime to track the min count node in an adaptive |
194 | * manner. It can correct previous mistakes and eventually |
195 | * lock on a cold target that is alread in stable state. |
196 | * |
197 | * In very rare cases, this replacement scheme may still lead |
198 | * to target loss. For instance, out of \c N value slots, \c N-1 |
199 | * slots are occupied by luke warm targets during the warmup |
200 | * period and the remaining one slot is competed by two or more |
201 | * very hot targets. If those hot targets occur in an interleaved |
202 | * way, none of them will survive (gain enough weight to throw out |
203 | * other established entries) due to the ping-pong effect. |
204 | * To handle this situation, user can choose to increase the max |
205 | * number of tracked values per value site. Alternatively, a more |
206 | * expensive eviction mechanism can be implemented. It requires |
207 | * the runtime to track the total number of evictions per-site. |
208 | * When the total number of evictions reaches certain threshold, |
209 | * the runtime can wipe out more than one lowest count entries |
210 | * to give space for hot targets. |
211 | */ |
212 | if (MinCountVNode->Count <= CountValue) { |
213 | CurVNode = MinCountVNode; |
214 | CurVNode->Value = TargetValue; |
215 | CurVNode->Count = CountValue; |
216 | } else |
217 | MinCountVNode->Count -= CountValue; |
218 | |
219 | return; |
220 | } |
221 | |
222 | CurVNode = allocateOneNode(); |
223 | if (!CurVNode) |
224 | return; |
225 | CurVNode->Value = TargetValue; |
226 | CurVNode->Count += CountValue; |
227 | |
228 | uint32_t Success = 0; |
229 | if (!ValueCounters[CounterIndex]) |
230 | Success = |
231 | COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); |
232 | else if (PrevVNode && !PrevVNode->Next) |
233 | Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); |
234 | |
235 | if (!Success && !hasStaticCounters) { |
236 | free(ptr: CurVNode); |
237 | return; |
238 | } |
239 | } |
240 | |
241 | COMPILER_RT_VISIBILITY void |
242 | __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, |
243 | uint32_t CounterIndex) { |
244 | instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue: 1); |
245 | } |
246 | COMPILER_RT_VISIBILITY void |
247 | __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, |
248 | uint32_t CounterIndex, |
249 | uint64_t CountValue) { |
250 | instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); |
251 | } |
252 | |
253 | /* |
254 | * The target values are partitioned into multiple ranges. The range spec is |
255 | * defined in InstrProfData.inc. |
256 | */ |
257 | COMPILER_RT_VISIBILITY void |
258 | __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, |
259 | uint32_t CounterIndex) { |
260 | // Map the target value to the representative value of its range. |
261 | uint64_t RepValue = InstrProfGetRangeRepValue(Value: TargetValue); |
262 | __llvm_profile_instrument_target(TargetValue: RepValue, Data, CounterIndex); |
263 | } |
264 | |
265 | /* |
266 | * A wrapper struct that represents value profile runtime data. |
267 | * Like InstrProfRecord class which is used by profiling host tools, |
268 | * ValueProfRuntimeRecord also implements the abstract interfaces defined in |
269 | * ValueProfRecordClosure so that the runtime data can be serialized using |
270 | * shared C implementation. |
271 | */ |
272 | typedef struct ValueProfRuntimeRecord { |
273 | const __llvm_profile_data *Data; |
274 | ValueProfNode **NodesKind[IPVK_Last + 1]; |
275 | uint8_t **SiteCountArray; |
276 | } ValueProfRuntimeRecord; |
277 | |
278 | /* ValueProfRecordClosure Interface implementation. */ |
279 | |
280 | static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { |
281 | return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; |
282 | } |
283 | |
284 | static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { |
285 | uint32_t S = 0, I; |
286 | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; |
287 | if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) |
288 | return 0; |
289 | for (I = 0; I < Record->Data->NumValueSites[VK]; I++) |
290 | S += Record->SiteCountArray[VK][I]; |
291 | return S; |
292 | } |
293 | |
294 | static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, |
295 | uint32_t S) { |
296 | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; |
297 | return Record->SiteCountArray[VK][S]; |
298 | } |
299 | |
300 | static ValueProfRuntimeRecord RTRecord; |
301 | static ValueProfRecordClosure RTRecordClosure = { |
302 | &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ |
303 | getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, |
304 | INSTR_PROF_NULLPTR, /* RemapValueData */ |
305 | INSTR_PROF_NULLPTR, /* GetValueForSite, */ |
306 | INSTR_PROF_NULLPTR /* AllocValueProfData */ |
307 | }; |
308 | |
309 | static uint32_t |
310 | initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, |
311 | uint8_t *SiteCountArray[]) { |
312 | unsigned I, J, S = 0, NumValueKinds = 0; |
313 | ValueProfNode **Nodes = (ValueProfNode **)Data->Values; |
314 | RTRecord.Data = Data; |
315 | RTRecord.SiteCountArray = SiteCountArray; |
316 | for (I = 0; I <= IPVK_Last; I++) { |
317 | uint16_t N = Data->NumValueSites[I]; |
318 | if (!N) |
319 | continue; |
320 | |
321 | NumValueKinds++; |
322 | |
323 | RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; |
324 | for (J = 0; J < N; J++) { |
325 | /* Compute value count for each site. */ |
326 | uint32_t C = 0; |
327 | ValueProfNode *Site = |
328 | Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; |
329 | while (Site) { |
330 | C++; |
331 | Site = Site->Next; |
332 | } |
333 | if (C > UCHAR_MAX) |
334 | C = UCHAR_MAX; |
335 | RTRecord.SiteCountArray[I][J] = C; |
336 | } |
337 | S += N; |
338 | } |
339 | return NumValueKinds; |
340 | } |
341 | |
342 | static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, |
343 | InstrProfValueData *Dst, |
344 | ValueProfNode *StartNode, uint32_t N) { |
345 | unsigned I; |
346 | ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; |
347 | for (I = 0; I < N; I++) { |
348 | Dst[I].Value = VNode->Value; |
349 | Dst[I].Count = VNode->Count; |
350 | VNode = VNode->Next; |
351 | } |
352 | return VNode; |
353 | } |
354 | |
355 | static uint32_t getValueProfDataSizeWrapper(void) { |
356 | return getValueProfDataSize(Closure: &RTRecordClosure); |
357 | } |
358 | |
359 | static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { |
360 | return getNumValueDataForSiteRT(R: &RTRecord, VK, S); |
361 | } |
362 | |
363 | static VPDataReaderType TheVPDataReader = { |
364 | initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, |
365 | getFirstValueProfRecord, getNumValueDataForSiteWrapper, |
366 | getValueProfDataSizeWrapper, getNextNValueData}; |
367 | |
368 | COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) { |
369 | return &TheVPDataReader; |
370 | } |
371 | |