1/*
2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for details.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef __STDC_FORMAT_MACROS
14#define __STDC_FORMAT_MACROS
15#endif
16
17#include <algorithm>
18#include <atomic>
19#include <cassert>
20#include <cstdlib>
21#include <cstring>
22#include <dlfcn.h>
23#include <inttypes.h>
24#include <iostream>
25#include <list>
26#include <mutex>
27#include <sstream>
28#include <string>
29#include <sys/resource.h>
30#include <unistd.h>
31#include <unordered_map>
32#include <vector>
33
34#include "omp-tools.h"
35
36// Define attribute that indicates that the fall through from the previous
37// case label is intentional and should not be diagnosed by a compiler
38// Code from libcxx/include/__config
39// Use a function like macro to imply that it must be followed by a semicolon
40#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41#define KMP_FALLTHROUGH() [[fallthrough]]
42// icc cannot properly tell this attribute is absent so force off
43#elif defined(__INTEL_COMPILER)
44#define KMP_FALLTHROUGH() ((void)0)
45#elif __has_cpp_attribute(clang::fallthrough)
46#define KMP_FALLTHROUGH() [[clang::fallthrough]]
47#elif __has_attribute(fallthrough) || __GNUC__ >= 7
48#define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
49#else
50#define KMP_FALLTHROUGH() ((void)0)
51#endif
52
53static int hasReductionCallback;
54
55namespace {
56class ArcherFlags {
57public:
58#if (LLVM_VERSION) >= 40
59 int flush_shadow{0};
60#endif
61 int print_max_rss{0};
62 int verbose{0};
63 int enabled{1};
64 int report_data_leak{0};
65 int ignore_serial{0};
66 std::atomic<int> all_memory{0};
67
68 ArcherFlags(const char *env) {
69 if (env) {
70 std::vector<std::string> tokens;
71 std::string token;
72 std::string str(env);
73 std::istringstream iss(str);
74 int tmp_int;
75 while (std::getline(in&: iss, str&: token, delim: ' '))
76 tokens.push_back(x: token);
77
78 for (std::vector<std::string>::iterator it = tokens.begin();
79 it != tokens.end(); ++it) {
80#if (LLVM_VERSION) >= 40
81 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
82 continue;
83#endif
84 if (sscanf(s: it->c_str(), format: "print_max_rss=%d", &print_max_rss))
85 continue;
86 if (sscanf(s: it->c_str(), format: "verbose=%d", &verbose))
87 continue;
88 if (sscanf(s: it->c_str(), format: "report_data_leak=%d", &report_data_leak))
89 continue;
90 if (sscanf(s: it->c_str(), format: "enable=%d", &enabled))
91 continue;
92 if (sscanf(s: it->c_str(), format: "ignore_serial=%d", &ignore_serial))
93 continue;
94 if (sscanf(s: it->c_str(), format: "all_memory=%d", &tmp_int)) {
95 all_memory = tmp_int;
96 continue;
97 }
98 std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
99 << std::endl;
100 }
101 }
102 }
103};
104
105class TsanFlags {
106public:
107 int ignore_noninstrumented_modules;
108
109 TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
110 if (env) {
111 std::vector<std::string> tokens;
112 std::string str(env);
113 auto end = str.end();
114 auto it = str.begin();
115 auto is_sep = [](char c) {
116 return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
117 c == '\r';
118 };
119 while (it != end) {
120 auto next_it = std::find_if(first: it, last: end, pred: is_sep);
121 tokens.emplace_back(args&: it, args&: next_it);
122 it = next_it;
123 if (it != end) {
124 ++it;
125 }
126 }
127
128 for (const auto &token : tokens) {
129 // we are interested in ignore_noninstrumented_modules to print a
130 // warning
131 if (sscanf(s: token.c_str(), format: "ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules))
133 continue;
134 }
135 }
136 }
137};
138} // namespace
139
140#if (LLVM_VERSION) >= 40
141extern "C" {
142int __attribute__((weak)) __archer_get_omp_status();
143void __attribute__((weak)) __tsan_flush_memory() {}
144}
145#endif
146static ArcherFlags *archer_flags;
147
148#ifndef TsanHappensBefore
149
150template <typename... Args> static void __ompt_tsan_func(Args...) {}
151
152#define DECLARE_TSAN_FUNCTION(name, ...) \
153 static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>;
154
155// Thread Sanitizer is a tool that finds races in code.
156// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
157// tsan detects these exact functions by name.
158extern "C" {
159DECLARE_TSAN_FUNCTION(AnnotateHappensAfter, const char *, int,
160 const volatile void *)
161DECLARE_TSAN_FUNCTION(AnnotateHappensBefore, const char *, int,
162 const volatile void *)
163DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin, const char *, int)
164DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd, const char *, int)
165DECLARE_TSAN_FUNCTION(AnnotateNewMemory, const char *, int,
166 const volatile void *, size_t)
167DECLARE_TSAN_FUNCTION(__tsan_func_entry, const void *)
168DECLARE_TSAN_FUNCTION(__tsan_func_exit)
169}
170
171// This marker is used to define a happens-before arc. The race detector will
172// infer an arc from the begin to the end when they share the same pointer
173// argument.
174#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
175
176// This marker defines the destination of a happens-before arc.
177#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
178
179// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
180#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
181
182// Resume checking for racy writes.
183#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
184
185// We don't really delete the clock for now
186#define TsanDeleteClock(cv)
187
188// newMemory
189#define TsanNewMemory(addr, size) \
190 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
191#define TsanFreeMemory(addr, size) \
192 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
193#endif
194
195// Function entry/exit
196#define TsanFuncEntry(pc) __tsan_func_entry(pc)
197#define TsanFuncExit() __tsan_func_exit()
198
199/// Required OMPT inquiry functions.
200static ompt_get_parallel_info_t ompt_get_parallel_info;
201static ompt_get_thread_data_t ompt_get_thread_data;
202
203typedef char ompt_tsan_clockid;
204
205static uint64_t my_next_id() {
206 static uint64_t ID = 0;
207 uint64_t ret = __sync_fetch_and_add(&ID, 1);
208 return ret;
209}
210
211static int pagesize{0};
212
213// Data structure to provide a threadsafe pool of reusable objects.
214// DataPool<Type of objects>
215namespace {
216template <typename T> struct DataPool final {
217 static __thread DataPool<T> *ThreadDataPool;
218 std::mutex DPMutex{};
219
220 // store unused objects
221 std::vector<T *> DataPointer{};
222 std::vector<T *> RemoteDataPointer{};
223
224 // store all allocated memory to finally release
225 std::list<void *> memory;
226
227 // count remotely returned data (RemoteDataPointer.size())
228 std::atomic<int> remote{0};
229
230 // totally allocated data objects in pool
231 int total{0};
232#ifdef DEBUG_DATA
233 int remoteReturn{0};
234 int localReturn{0};
235
236 int getRemote() { return remoteReturn + remote; }
237 int getLocal() { return localReturn; }
238#endif
239 int getTotal() { return total; }
240 int getMissing() {
241 return total - DataPointer.size() - RemoteDataPointer.size();
242 }
243
244 // fill the pool by allocating a page of memory
245 void newDatas() {
246 if (remote > 0) {
247 const std::lock_guard<std::mutex> lock(DPMutex);
248 // DataPointer is empty, so just swap the vectors
249 DataPointer.swap(RemoteDataPointer);
250 remote = 0;
251 return;
252 }
253 // calculate size of an object including padding to cacheline size
254 size_t elemSize = sizeof(T);
255 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
256 // number of padded elements to allocate
257 int ndatas = pagesize / paddedSize;
258 char *datas = (char *)malloc(size: ndatas * paddedSize);
259 memory.push_back(x: datas);
260 for (int i = 0; i < ndatas; i++) {
261 DataPointer.push_back(new (datas + i * paddedSize) T(this));
262 }
263 total += ndatas;
264 }
265
266 // get data from the pool
267 T *getData() {
268 T *ret;
269 if (DataPointer.empty())
270 newDatas();
271 ret = DataPointer.back();
272 DataPointer.pop_back();
273 return ret;
274 }
275
276 // accesses to the thread-local datapool don't need locks
277 void returnOwnData(T *data) {
278 DataPointer.emplace_back(data);
279#ifdef DEBUG_DATA
280 localReturn++;
281#endif
282 }
283
284 // returning to a remote datapool using lock
285 void returnData(T *data) {
286 const std::lock_guard<std::mutex> lock(DPMutex);
287 RemoteDataPointer.emplace_back(data);
288 remote++;
289#ifdef DEBUG_DATA
290 remoteReturn++;
291#endif
292 }
293
294 ~DataPool() {
295 // we assume all memory is returned when the thread finished / destructor is
296 // called
297 if (archer_flags->report_data_leak && getMissing() != 0) {
298 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
299 "objects.\n",
300 __PRETTY_FUNCTION__, getMissing());
301 exit(status: -3);
302 }
303 for (auto i : DataPointer)
304 if (i)
305 i->~T();
306 for (auto i : RemoteDataPointer)
307 if (i)
308 i->~T();
309 for (auto i : memory)
310 if (i)
311 free(ptr: i);
312 }
313};
314
315template <typename T> struct DataPoolEntry {
316 DataPool<T> *owner;
317
318 static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
319
320 void Delete() {
321 static_cast<T *>(this)->Reset();
322 if (owner == DataPool<T>::ThreadDataPool)
323 owner->returnOwnData(static_cast<T *>(this));
324 else
325 owner->returnData(static_cast<T *>(this));
326 }
327
328 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
329};
330
331struct DependencyData;
332typedef DataPool<DependencyData> DependencyDataPool;
333template <>
334__thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
335
336/// Data structure to store additional information for task dependency.
337struct DependencyData final : DataPoolEntry<DependencyData> {
338 ompt_tsan_clockid in;
339 ompt_tsan_clockid out;
340 ompt_tsan_clockid inoutset;
341 void *GetInPtr() { return &in; }
342 void *GetOutPtr() { return &out; }
343 void *GetInoutsetPtr() { return &inoutset; }
344
345 void Reset() {}
346
347 static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
348
349 DependencyData(DataPool<DependencyData> *dp)
350 : DataPoolEntry<DependencyData>(dp) {}
351};
352
353struct TaskDependency {
354 void *inPtr;
355 void *outPtr;
356 void *inoutsetPtr;
357 ompt_dependence_type_t type;
358 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
359 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
360 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
361 void AnnotateBegin() {
362 if (type == ompt_dependence_type_out ||
363 type == ompt_dependence_type_inout ||
364 type == ompt_dependence_type_mutexinoutset) {
365 TsanHappensAfter(inPtr);
366 TsanHappensAfter(outPtr);
367 TsanHappensAfter(inoutsetPtr);
368 } else if (type == ompt_dependence_type_in) {
369 TsanHappensAfter(outPtr);
370 TsanHappensAfter(inoutsetPtr);
371 } else if (type == ompt_dependence_type_inoutset) {
372 TsanHappensAfter(inPtr);
373 TsanHappensAfter(outPtr);
374 }
375 }
376 void AnnotateEnd() {
377 if (type == ompt_dependence_type_out ||
378 type == ompt_dependence_type_inout ||
379 type == ompt_dependence_type_mutexinoutset) {
380 TsanHappensBefore(outPtr);
381 } else if (type == ompt_dependence_type_in) {
382 TsanHappensBefore(inPtr);
383 } else if (type == ompt_dependence_type_inoutset) {
384 TsanHappensBefore(inoutsetPtr);
385 }
386 }
387};
388
389struct ParallelData;
390typedef DataPool<ParallelData> ParallelDataPool;
391template <>
392__thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
393
394/// Data structure to store additional information for parallel regions.
395struct ParallelData final : DataPoolEntry<ParallelData> {
396
397 // Parallel fork is just another barrier, use Barrier[1]
398
399 /// Two addresses for relationships with barriers.
400 ompt_tsan_clockid Barrier[2];
401
402 const void *codePtr;
403
404 void *GetParallelPtr() { return &(Barrier[1]); }
405
406 void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
407
408 ParallelData *Init(const void *codeptr) {
409 codePtr = codeptr;
410 return this;
411 }
412
413 void Reset() {}
414
415 static ParallelData *New(const void *codeptr) {
416 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
417 }
418
419 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
420};
421
422static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
423 return reinterpret_cast<ParallelData *>(parallel_data->ptr);
424}
425
426struct Taskgroup;
427typedef DataPool<Taskgroup> TaskgroupPool;
428template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
429
430/// Data structure to support stacking of taskgroups and allow synchronization.
431struct Taskgroup final : DataPoolEntry<Taskgroup> {
432 /// Its address is used for relationships of the taskgroup's task set.
433 ompt_tsan_clockid Ptr;
434
435 /// Reference to the parent taskgroup.
436 Taskgroup *Parent;
437
438 void *GetPtr() { return &Ptr; }
439
440 Taskgroup *Init(Taskgroup *parent) {
441 Parent = parent;
442 return this;
443 }
444
445 void Reset() {}
446
447 static Taskgroup *New(Taskgroup *Parent) {
448 return DataPoolEntry<Taskgroup>::New()->Init(parent: Parent);
449 }
450
451 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
452};
453
454enum ArcherTaskFlag { ArcherTaskFulfilled = 0x00010000 };
455
456struct TaskData;
457typedef DataPool<TaskData> TaskDataPool;
458template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
459
460/// Data structure to store additional information for tasks.
461struct TaskData final : DataPoolEntry<TaskData> {
462 /// Its address is used for relationships of this task.
463 ompt_tsan_clockid Task{0};
464
465 /// Child tasks use its address to declare a relationship to a taskwait in
466 /// this task.
467 ompt_tsan_clockid Taskwait{0};
468
469 /// Child tasks use its address to model omp_all_memory dependencies
470 ompt_tsan_clockid AllMemory[2]{0};
471
472 /// Index of which barrier to use next.
473 char BarrierIndex{0};
474
475 /// Whether this task is currently executing a barrier.
476 bool InBarrier{false};
477
478 /// Whether this task is an included task.
479 int TaskType{0};
480
481 /// count execution phase
482 int execution{0};
483
484 /// Count how often this structure has been put into child tasks + 1.
485 std::atomic_int RefCount{1};
486
487 /// Reference to the parent that created this task.
488 TaskData *Parent{nullptr};
489
490 /// Reference to the team of this task.
491 ParallelData *Team{nullptr};
492
493 /// Reference to the current taskgroup that this task either belongs to or
494 /// that it just created.
495 Taskgroup *TaskGroup{nullptr};
496
497 /// Dependency information for this task.
498 TaskDependency *Dependencies{nullptr};
499
500 /// Number of dependency entries.
501 unsigned DependencyCount{0};
502
503 // The dependency-map stores DependencyData objects representing
504 // the dependency variables used on the sibling tasks created from
505 // this task
506 // We expect a rare need for the dependency-map, so alloc on demand
507 std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
508
509#ifdef DEBUG
510 int freed{0};
511#endif
512
513 bool isIncluded() { return TaskType & ompt_task_undeferred; }
514 bool isUntied() { return TaskType & ompt_task_untied; }
515 bool isFinal() { return TaskType & ompt_task_final; }
516 bool isMergable() { return TaskType & ompt_task_mergeable; }
517 bool isMerged() { return TaskType & ompt_task_merged; }
518
519 bool isExplicit() { return TaskType & ompt_task_explicit; }
520 bool isImplicit() { return TaskType & ompt_task_implicit; }
521 bool isInitial() { return TaskType & ompt_task_initial; }
522 bool isTarget() { return TaskType & ompt_task_target; }
523
524 bool isFulfilled() { return TaskType & ArcherTaskFulfilled; }
525 void setFulfilled() { TaskType |= ArcherTaskFulfilled; }
526
527 void setAllMemoryDep() { AllMemory[0] = 1; }
528 bool hasAllMemoryDep() { return AllMemory[0]; }
529
530 void *GetTaskPtr() { return &Task; }
531
532 void *GetTaskwaitPtr() { return &Taskwait; }
533
534 void *GetLastAllMemoryPtr() { return AllMemory; }
535 void *GetNextAllMemoryPtr() { return AllMemory + 1; }
536
537 TaskData *Init(TaskData *parent, int taskType) {
538 TaskType = taskType;
539 Parent = parent;
540 Team = Parent->Team;
541 BarrierIndex = Parent->BarrierIndex;
542 if (Parent != nullptr) {
543 Parent->RefCount++;
544 // Copy over pointer to taskgroup. This task may set up its own stack
545 // but for now belongs to its parent's taskgroup.
546 TaskGroup = Parent->TaskGroup;
547 }
548 return this;
549 }
550
551 TaskData *Init(ParallelData *team, int taskType) {
552 TaskType = taskType;
553 execution = 1;
554 Team = team;
555 return this;
556 }
557
558 void Reset() {
559 InBarrier = false;
560 TaskType = 0;
561 execution = 0;
562 BarrierIndex = 0;
563 RefCount = 1;
564 Parent = nullptr;
565 Team = nullptr;
566 TaskGroup = nullptr;
567 if (DependencyMap) {
568 for (auto i : *DependencyMap)
569 i.second->Delete();
570 delete DependencyMap;
571 }
572 DependencyMap = nullptr;
573 if (Dependencies)
574 free(ptr: Dependencies);
575 Dependencies = nullptr;
576 DependencyCount = 0;
577#ifdef DEBUG
578 freed = 0;
579#endif
580 }
581
582 static TaskData *New(TaskData *parent, int taskType) {
583 return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
584 }
585
586 static TaskData *New(ParallelData *team, int taskType) {
587 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
588 }
589
590 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
591};
592} // namespace
593
594static inline TaskData *ToTaskData(ompt_data_t *task_data) {
595 if (task_data)
596 return reinterpret_cast<TaskData *>(task_data->ptr);
597 return nullptr;
598}
599
600/// Store a mutex for each wait_id to resolve race condition with callbacks.
601static std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
602static std::mutex LocksMutex;
603
604static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
605 ompt_data_t *thread_data) {
606 ParallelDataPool::ThreadDataPool = new ParallelDataPool;
607 TsanNewMemory(ParallelDataPool::ThreadDataPool,
608 sizeof(ParallelDataPool::ThreadDataPool));
609 TaskgroupPool::ThreadDataPool = new TaskgroupPool;
610 TsanNewMemory(TaskgroupPool::ThreadDataPool,
611 sizeof(TaskgroupPool::ThreadDataPool));
612 TaskDataPool::ThreadDataPool = new TaskDataPool;
613 TsanNewMemory(TaskDataPool::ThreadDataPool,
614 sizeof(TaskDataPool::ThreadDataPool));
615 DependencyDataPool::ThreadDataPool = new DependencyDataPool;
616 TsanNewMemory(DependencyDataPool::ThreadDataPool,
617 sizeof(DependencyDataPool::ThreadDataPool));
618 thread_data->value = my_next_id();
619}
620
621static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
622 TsanIgnoreWritesBegin();
623 delete ParallelDataPool::ThreadDataPool;
624 delete TaskgroupPool::ThreadDataPool;
625 delete TaskDataPool::ThreadDataPool;
626 delete DependencyDataPool::ThreadDataPool;
627 TsanIgnoreWritesEnd();
628}
629
630/// OMPT event callbacks for handling parallel regions.
631
632static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
633 const ompt_frame_t *parent_task_frame,
634 ompt_data_t *parallel_data,
635 uint32_t requested_team_size, int flag,
636 const void *codeptr_ra) {
637 ParallelData *Data = ParallelData::New(codeptr: codeptr_ra);
638 parallel_data->ptr = Data;
639
640 TsanHappensBefore(Data->GetParallelPtr());
641 if (archer_flags->ignore_serial && ToTaskData(task_data: parent_task_data)->isInitial())
642 TsanIgnoreWritesEnd();
643}
644
645static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
646 ompt_data_t *task_data, int flag,
647 const void *codeptr_ra) {
648 if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
649 TsanIgnoreWritesBegin();
650 ParallelData *Data = ToParallelData(parallel_data);
651 TsanHappensAfter(Data->GetBarrierPtr(0));
652 TsanHappensAfter(Data->GetBarrierPtr(1));
653
654 Data->Delete();
655
656#if (LLVM_VERSION >= 40)
657 if (&__archer_get_omp_status) {
658 if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
659 __tsan_flush_memory();
660 }
661#endif
662}
663
664static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
665 ompt_data_t *parallel_data,
666 ompt_data_t *task_data,
667 unsigned int team_size,
668 unsigned int thread_num, int type) {
669 switch (endpoint) {
670 case ompt_scope_begin:
671 if (type & ompt_task_initial) {
672 parallel_data->ptr = ParallelData::New(codeptr: nullptr);
673 }
674 task_data->ptr = TaskData::New(team: ToParallelData(parallel_data), taskType: type);
675 TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
676 TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
677 break;
678 case ompt_scope_end: {
679 TaskData *Data = ToTaskData(task_data);
680#ifdef DEBUG
681 assert(Data->freed == 0 && "Implicit task end should only be called once!");
682 Data->freed = 1;
683#endif
684 assert(Data->RefCount == 1 &&
685 "All tasks should have finished at the implicit barrier!");
686 if (type & ompt_task_initial) {
687 Data->Team->Delete();
688 }
689 Data->Delete();
690 TsanFuncExit();
691 break;
692 }
693 case ompt_scope_beginend:
694 // Should not occur according to OpenMP 5.1
695 // Tested in OMPT tests
696 break;
697 }
698}
699
700static void ompt_tsan_sync_region(ompt_sync_region_t kind,
701 ompt_scope_endpoint_t endpoint,
702 ompt_data_t *parallel_data,
703 ompt_data_t *task_data,
704 const void *codeptr_ra) {
705 TaskData *Data = ToTaskData(task_data);
706 switch (endpoint) {
707 case ompt_scope_begin:
708 case ompt_scope_beginend:
709 TsanFuncEntry(codeptr_ra);
710 switch (kind) {
711 case ompt_sync_region_barrier_implementation:
712 case ompt_sync_region_barrier_implicit:
713 case ompt_sync_region_barrier_explicit:
714 case ompt_sync_region_barrier_implicit_parallel:
715 case ompt_sync_region_barrier_implicit_workshare:
716 case ompt_sync_region_barrier_teams:
717 case ompt_sync_region_barrier: {
718 char BarrierIndex = Data->BarrierIndex;
719 TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
720
721 if (hasReductionCallback < ompt_set_always) {
722 // We ignore writes inside the barrier. These would either occur during
723 // 1. reductions performed by the runtime which are guaranteed to be
724 // race-free.
725 // 2. execution of another task.
726 // For the latter case we will re-enable tracking in task_switch.
727 Data->InBarrier = true;
728 TsanIgnoreWritesBegin();
729 }
730
731 break;
732 }
733
734 case ompt_sync_region_taskwait:
735 break;
736
737 case ompt_sync_region_taskgroup:
738 Data->TaskGroup = Taskgroup::New(Parent: Data->TaskGroup);
739 break;
740
741 case ompt_sync_region_reduction:
742 // should never be reached
743 break;
744 }
745 if (endpoint == ompt_scope_begin)
746 break;
747 KMP_FALLTHROUGH();
748 case ompt_scope_end:
749 TsanFuncExit();
750 switch (kind) {
751 case ompt_sync_region_barrier_implementation:
752 case ompt_sync_region_barrier_implicit:
753 case ompt_sync_region_barrier_explicit:
754 case ompt_sync_region_barrier_implicit_parallel:
755 case ompt_sync_region_barrier_implicit_workshare:
756 case ompt_sync_region_barrier_teams:
757 case ompt_sync_region_barrier: {
758 if (hasReductionCallback < ompt_set_always) {
759 // We want to track writes after the barrier again.
760 Data->InBarrier = false;
761 TsanIgnoreWritesEnd();
762 }
763
764 char BarrierIndex = Data->BarrierIndex;
765 // Barrier will end after it has been entered by all threads.
766 if (parallel_data)
767 TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
768
769 // It is not guaranteed that all threads have exited this barrier before
770 // we enter the next one. So we will use a different address.
771 // We are however guaranteed that this current barrier is finished
772 // by the time we exit the next one. So we can then reuse the first
773 // address.
774 Data->BarrierIndex = (BarrierIndex + 1) % 2;
775 break;
776 }
777
778 case ompt_sync_region_taskwait: {
779 if (Data->execution > 1)
780 TsanHappensAfter(Data->GetTaskwaitPtr());
781 break;
782 }
783
784 case ompt_sync_region_taskgroup: {
785 assert(Data->TaskGroup != nullptr &&
786 "Should have at least one taskgroup!");
787
788 TsanHappensAfter(Data->TaskGroup->GetPtr());
789
790 // Delete this allocated taskgroup, all descendent task are finished by
791 // now.
792 Taskgroup *Parent = Data->TaskGroup->Parent;
793 Data->TaskGroup->Delete();
794 Data->TaskGroup = Parent;
795 break;
796 }
797
798 case ompt_sync_region_reduction:
799 // Should not occur according to OpenMP 5.1
800 // Tested in OMPT tests
801 break;
802 }
803 break;
804 }
805}
806
807static void ompt_tsan_reduction(ompt_sync_region_t kind,
808 ompt_scope_endpoint_t endpoint,
809 ompt_data_t *parallel_data,
810 ompt_data_t *task_data,
811 const void *codeptr_ra) {
812 switch (endpoint) {
813 case ompt_scope_begin:
814 switch (kind) {
815 case ompt_sync_region_reduction:
816 TsanIgnoreWritesBegin();
817 break;
818 default:
819 break;
820 }
821 break;
822 case ompt_scope_end:
823 switch (kind) {
824 case ompt_sync_region_reduction:
825 TsanIgnoreWritesEnd();
826 break;
827 default:
828 break;
829 }
830 break;
831 case ompt_scope_beginend:
832 // Should not occur according to OpenMP 5.1
833 // Tested in OMPT tests
834 // Would have no implications for DR detection
835 break;
836 }
837}
838
839/// OMPT event callbacks for handling tasks.
840
841static void ompt_tsan_task_create(
842 ompt_data_t *parent_task_data, /* id of parent task */
843 const ompt_frame_t *parent_frame, /* frame data for parent task */
844 ompt_data_t *new_task_data, /* id of created task */
845 int type, int has_dependences,
846 const void *codeptr_ra) /* pointer to outlined function */
847{
848 TaskData *Data;
849 assert(new_task_data->ptr == NULL &&
850 "Task data should be initialized to NULL");
851 if (type & ompt_task_initial) {
852 ompt_data_t *parallel_data;
853 int team_size = 1;
854 ompt_get_parallel_info(0, &parallel_data, &team_size);
855 ParallelData *PData = ParallelData::New(codeptr: nullptr);
856 parallel_data->ptr = PData;
857
858 Data = TaskData::New(team: PData, taskType: type);
859 new_task_data->ptr = Data;
860 } else if (type & ompt_task_undeferred) {
861 Data = TaskData::New(parent: ToTaskData(task_data: parent_task_data), taskType: type);
862 new_task_data->ptr = Data;
863 } else if (type & ompt_task_explicit || type & ompt_task_target) {
864 Data = TaskData::New(parent: ToTaskData(task_data: parent_task_data), taskType: type);
865 new_task_data->ptr = Data;
866
867 // Use the newly created address. We cannot use a single address from the
868 // parent because that would declare wrong relationships with other
869 // sibling tasks that may be created before this task is started!
870 TsanHappensBefore(Data->GetTaskPtr());
871 ToTaskData(task_data: parent_task_data)->execution++;
872 }
873}
874
875static void freeTask(TaskData *task) {
876 while (task != nullptr && --task->RefCount == 0) {
877 TaskData *Parent = task->Parent;
878 task->Delete();
879 task = Parent;
880 }
881}
882
883// LastAllMemoryPtr marks the beginning of an all_memory epoch
884// NextAllMemoryPtr marks the end of an all_memory epoch
885// All tasks with depend begin execution after LastAllMemoryPtr
886// and end before NextAllMemoryPtr
887static void releaseDependencies(TaskData *task) {
888 if (archer_flags->all_memory) {
889 if (task->hasAllMemoryDep()) {
890 TsanHappensBefore(task->Parent->GetLastAllMemoryPtr());
891 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
892 } else if (task->DependencyCount)
893 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
894 }
895 for (unsigned i = 0; i < task->DependencyCount; i++) {
896 task->Dependencies[i].AnnotateEnd();
897 }
898}
899
900static void acquireDependencies(TaskData *task) {
901 if (archer_flags->all_memory) {
902 if (task->hasAllMemoryDep())
903 TsanHappensAfter(task->Parent->GetNextAllMemoryPtr());
904 else if (task->DependencyCount)
905 TsanHappensAfter(task->Parent->GetLastAllMemoryPtr());
906 }
907 for (unsigned i = 0; i < task->DependencyCount; i++) {
908 task->Dependencies[i].AnnotateBegin();
909 }
910}
911
912static void completeTask(TaskData *FromTask) {
913 if (!FromTask)
914 return;
915 // Task-end happens after a possible omp_fulfill_event call
916 if (FromTask->isFulfilled())
917 TsanHappensAfter(FromTask->GetTaskPtr());
918 // Included tasks are executed sequentially, no need to track
919 // synchronization
920 if (!FromTask->isIncluded()) {
921 // Task will finish before a barrier in the surrounding parallel region
922 // ...
923 ParallelData *PData = FromTask->Team;
924 TsanHappensBefore(PData->GetBarrierPtr(FromTask->BarrierIndex));
925
926 // ... and before an eventual taskwait by the parent thread.
927 TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
928
929 if (FromTask->TaskGroup != nullptr) {
930 // This task is part of a taskgroup, so it will finish before the
931 // corresponding taskgroup_end.
932 TsanHappensBefore(FromTask->TaskGroup->GetPtr());
933 }
934 }
935 // release dependencies
936 releaseDependencies(task: FromTask);
937}
938
939static void suspendTask(TaskData *FromTask) {
940 if (!FromTask)
941 return;
942 // Task may be resumed at a later point in time.
943 TsanHappensBefore(FromTask->GetTaskPtr());
944}
945
946static void switchTasks(TaskData *FromTask, TaskData *ToTask) {
947 // Legacy handling for missing reduction callback
948 if (hasReductionCallback < ompt_set_always) {
949 if (FromTask && FromTask->InBarrier) {
950 // We want to ignore writes in the runtime code during barriers,
951 // but not when executing tasks with user code!
952 TsanIgnoreWritesEnd();
953 }
954 if (ToTask && ToTask->InBarrier) {
955 // We want to ignore writes in the runtime code during barriers,
956 // but not when executing tasks with user code!
957 TsanIgnoreWritesBegin();
958 }
959 }
960 //// Not yet used
961 // if (FromTask)
962 // FromTask->deactivate();
963 // if (ToTask)
964 // ToTask->activate();
965}
966
967static void endTask(TaskData *FromTask) {
968 if (!FromTask)
969 return;
970}
971
972static void startTask(TaskData *ToTask) {
973 if (!ToTask)
974 return;
975 // Handle dependencies on first execution of the task
976 if (ToTask->execution == 0) {
977 ToTask->execution++;
978 acquireDependencies(task: ToTask);
979 }
980 // 1. Task will begin execution after it has been created.
981 // 2. Task will resume after it has been switched away.
982 TsanHappensAfter(ToTask->GetTaskPtr());
983}
984
985static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
986 ompt_task_status_t prior_task_status,
987 ompt_data_t *second_task_data) {
988
989 //
990 // The necessary action depends on prior_task_status:
991 //
992 // ompt_task_early_fulfill = 5,
993 // -> ignored
994 //
995 // ompt_task_late_fulfill = 6,
996 // -> first completed, first freed, second ignored
997 //
998 // ompt_task_complete = 1,
999 // ompt_task_cancel = 3,
1000 // -> first completed, first freed, second starts
1001 //
1002 // ompt_taskwait_complete = 8,
1003 // -> first starts, first completes, first freed, second ignored
1004 //
1005 // ompt_task_detach = 4,
1006 // ompt_task_yield = 2,
1007 // ompt_task_switch = 7
1008 // -> first suspended, second starts
1009 //
1010
1011 TaskData *FromTask = ToTaskData(task_data: first_task_data);
1012 TaskData *ToTask = ToTaskData(task_data: second_task_data);
1013
1014 switch (prior_task_status) {
1015 case ompt_task_early_fulfill:
1016 TsanHappensBefore(FromTask->GetTaskPtr());
1017 FromTask->setFulfilled();
1018 return;
1019 case ompt_task_late_fulfill:
1020 TsanHappensAfter(FromTask->GetTaskPtr());
1021 completeTask(FromTask);
1022 freeTask(task: FromTask);
1023 return;
1024 case ompt_taskwait_complete:
1025 acquireDependencies(task: FromTask);
1026 freeTask(task: FromTask);
1027 return;
1028 case ompt_task_complete:
1029 completeTask(FromTask);
1030 endTask(FromTask);
1031 switchTasks(FromTask, ToTask);
1032 freeTask(task: FromTask);
1033 return;
1034 case ompt_task_cancel:
1035 completeTask(FromTask);
1036 endTask(FromTask);
1037 switchTasks(FromTask, ToTask);
1038 freeTask(task: FromTask);
1039 startTask(ToTask);
1040 return;
1041 case ompt_task_detach:
1042 endTask(FromTask);
1043 suspendTask(FromTask);
1044 switchTasks(FromTask, ToTask);
1045 startTask(ToTask);
1046 return;
1047 case ompt_task_yield:
1048 suspendTask(FromTask);
1049 switchTasks(FromTask, ToTask);
1050 startTask(ToTask);
1051 return;
1052 case ompt_task_switch:
1053 suspendTask(FromTask);
1054 switchTasks(FromTask, ToTask);
1055 startTask(ToTask);
1056 return;
1057 }
1058}
1059
1060static void ompt_tsan_dependences(ompt_data_t *task_data,
1061 const ompt_dependence_t *deps, int ndeps) {
1062 if (ndeps > 0) {
1063 // Copy the data to use it in task_switch and task_end.
1064 TaskData *Data = ToTaskData(task_data);
1065 if (!Data->Parent) {
1066 // Return since doacross dependences are not supported yet.
1067 return;
1068 }
1069 if (!Data->Parent->DependencyMap)
1070 Data->Parent->DependencyMap =
1071 new std::unordered_map<void *, DependencyData *>();
1072 Data->Dependencies =
1073 (TaskDependency *)malloc(size: sizeof(TaskDependency) * ndeps);
1074 Data->DependencyCount = ndeps;
1075 for (int i = 0, d = 0; i < ndeps; i++, d++) {
1076 if (deps[i].dependence_type == ompt_dependence_type_out_all_memory ||
1077 deps[i].dependence_type == ompt_dependence_type_inout_all_memory) {
1078 Data->setAllMemoryDep();
1079 Data->DependencyCount--;
1080 if (!archer_flags->all_memory) {
1081 printf(format: "The application uses omp_all_memory, but Archer was\n"
1082 "started to not consider omp_all_memory. This can lead\n"
1083 "to false data race alerts.\n"
1084 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1085 "omp_all_memory from the beginning.\n");
1086 archer_flags->all_memory = 1;
1087 }
1088 d--;
1089 continue;
1090 }
1091 auto ret = Data->Parent->DependencyMap->insert(
1092 x: std::make_pair(x: deps[i].variable.ptr, y: nullptr));
1093 if (ret.second) {
1094 ret.first->second = DependencyData::New();
1095 }
1096 new ((void *)(Data->Dependencies + d))
1097 TaskDependency(ret.first->second, deps[i].dependence_type);
1098 }
1099
1100 // This callback is executed before this task is first started.
1101 TsanHappensBefore(Data->GetTaskPtr());
1102 }
1103}
1104
1105/// OMPT event callbacks for handling locking.
1106static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1107 const void *codeptr_ra) {
1108
1109 // Acquire our own lock to make sure that
1110 // 1. the previous release has finished.
1111 // 2. the next acquire doesn't start before we have finished our release.
1112 LocksMutex.lock();
1113 std::mutex &Lock = Locks[wait_id];
1114 LocksMutex.unlock();
1115
1116 Lock.lock();
1117 TsanHappensAfter(&Lock);
1118}
1119
1120static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1121 const void *codeptr_ra) {
1122 LocksMutex.lock();
1123 std::mutex &Lock = Locks[wait_id];
1124 LocksMutex.unlock();
1125 TsanHappensBefore(&Lock);
1126
1127 Lock.unlock();
1128}
1129
1130// callback , signature , variable to store result , required support level
1131#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1132 do { \
1133 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1134 result = ompt_set_callback(ompt_callback_##event, \
1135 (ompt_callback_t)tsan_##event); \
1136 if (result < level) \
1137 printf("Registered callback '" #event "' is not supported at " #level \
1138 " (%i)\n", \
1139 result); \
1140 } while (0)
1141
1142#define SET_CALLBACK_T(event, type) \
1143 do { \
1144 int res; \
1145 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1146 } while (0)
1147
1148#define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1149
1150#define findTsanFunction(f, fSig) \
1151 do { \
1152 void *fp = dlsym(RTLD_DEFAULT, #f); \
1153 if (fp) \
1154 f = fSig fp; \
1155 else \
1156 printf("Unable to find TSan function " #f ".\n"); \
1157 } while (0)
1158
1159#define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1160
1161static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1162 ompt_data_t *tool_data) {
1163 const char *options = getenv(name: "TSAN_OPTIONS");
1164 TsanFlags tsan_flags(options);
1165
1166 ompt_set_callback_t ompt_set_callback =
1167 (ompt_set_callback_t)lookup("ompt_set_callback");
1168 if (ompt_set_callback == NULL) {
1169 std::cerr << "Could not set callback, exiting..." << std::endl;
1170 std::exit(status: 1);
1171 }
1172 ompt_get_parallel_info =
1173 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1174 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1175
1176 if (ompt_get_parallel_info == NULL) {
1177 fprintf(stderr, format: "Could not get inquiry function 'ompt_get_parallel_info', "
1178 "exiting...\n");
1179 exit(status: 1);
1180 }
1181
1182 findTsanFunction(AnnotateHappensAfter,
1183 (void (*)(const char *, int, const volatile void *)));
1184 findTsanFunction(AnnotateHappensBefore,
1185 (void (*)(const char *, int, const volatile void *)));
1186 findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1187 findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1188 findTsanFunction(
1189 AnnotateNewMemory,
1190 (void (*)(const char *, int, const volatile void *, size_t)));
1191 findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1192 findTsanFunction(__tsan_func_exit, (void (*)(void)));
1193
1194 SET_CALLBACK(thread_begin);
1195 SET_CALLBACK(thread_end);
1196 SET_CALLBACK(parallel_begin);
1197 SET_CALLBACK(implicit_task);
1198 SET_CALLBACK(sync_region);
1199 SET_CALLBACK(parallel_end);
1200
1201 SET_CALLBACK(task_create);
1202 SET_CALLBACK(task_schedule);
1203 SET_CALLBACK(dependences);
1204
1205 SET_CALLBACK_T(mutex_acquired, mutex);
1206 SET_CALLBACK_T(mutex_released, mutex);
1207 SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1208 ompt_set_never);
1209
1210 if (!tsan_flags.ignore_noninstrumented_modules)
1211 fprintf(stderr,
1212 format: "Warning: please export "
1213 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1214 "to avoid false positive reports from the OpenMP runtime!\n");
1215 if (archer_flags->ignore_serial)
1216 TsanIgnoreWritesBegin();
1217
1218 return 1; // success
1219}
1220
1221static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1222 if (archer_flags->ignore_serial)
1223 TsanIgnoreWritesEnd();
1224 if (archer_flags->print_max_rss) {
1225 struct rusage end;
1226 getrusage(RUSAGE_SELF, usage: &end);
1227 printf(format: "MAX RSS[KiB] during execution: %ld\n", end.ru_maxrss);
1228 }
1229
1230 if (archer_flags)
1231 delete archer_flags;
1232}
1233
1234extern "C" ompt_start_tool_result_t *
1235ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1236 const char *options = getenv(name: "ARCHER_OPTIONS");
1237 archer_flags = new ArcherFlags(options);
1238 if (!archer_flags->enabled) {
1239 if (archer_flags->verbose)
1240 std::cout << "Archer disabled, stopping operation" << std::endl;
1241 delete archer_flags;
1242 return NULL;
1243 }
1244
1245 pagesize = getpagesize();
1246
1247 static ompt_start_tool_result_t ompt_start_tool_result = {
1248 .initialize: &ompt_tsan_initialize, .finalize: &ompt_tsan_finalize, .tool_data: {.value: 0}};
1249
1250 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1251 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1252 // for the first TSan call. We use __tsan_init to detect whether
1253 // an implementation of the Annotation interface is available in the
1254 // execution or disable the tool (by returning NULL).
1255
1256 void (*__tsan_init)(void) = nullptr;
1257
1258 findTsanFunctionSilent(__tsan_init, (void (*)(void)));
1259 if (!__tsan_init) // if we are not running on TSAN, give a different
1260 // tool the chance to be loaded
1261 {
1262 if (archer_flags->verbose)
1263 std::cout << "Archer detected OpenMP application without TSan; "
1264 "stopping operation"
1265 << std::endl;
1266 delete archer_flags;
1267 return NULL;
1268 }
1269
1270 if (archer_flags->verbose)
1271 std::cout << "Archer detected OpenMP application with TSan, supplying "
1272 "OpenMP synchronization semantics"
1273 << std::endl;
1274 return &ompt_start_tool_result;
1275}
1276

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of openmp/tools/archer/ompt-tsan.cpp