1 | // RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t |
2 | // RUN: env NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t 2>&1 | FileCheck %s |
3 | |
4 | // RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t |
5 | // RUN: env NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t 2>&1 | FileCheck %s |
6 | |
7 | // RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t |
8 | // RUN: env NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t |
9 | |
10 | // RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t |
11 | // RUN: env NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t |
12 | |
13 | #include <chrono> |
14 | #include <iostream> |
15 | #include <random> |
16 | #include <vector> |
17 | |
18 | // A naive, unstable summation. |
19 | template <typename T> |
20 | __attribute__((noinline)) // To check call stack reporting. |
21 | T NaiveSum(const std::vector<T>& values) { |
22 | T sum = 0; |
23 | for (T v : values) { |
24 | sum += v; |
25 | } |
26 | return sum; |
27 | // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return |
28 | // CHECK: float{{ *}}precision (native): |
29 | // CHECK: double{{ *}}precision (shadow): |
30 | // CHECK: {{#0 .*in .* NaiveSum}} |
31 | } |
32 | |
33 | // Kahan's summation is a numerically stable sum. |
34 | // https://en.wikipedia.org/wiki/Kahan_summation_algorithm |
35 | template <typename T> |
36 | __attribute__((noinline)) T KahanSum(const std::vector<T> &values) { |
37 | T sum = 0; |
38 | T c = 0; |
39 | for (T v : values) { |
40 | T y = v - c; |
41 | T t = sum + y; |
42 | c = (t - sum) - y; |
43 | sum = t; |
44 | } |
45 | return sum; |
46 | } |
47 | |
48 | int main() { |
49 | std::vector<FLT> values; |
50 | constexpr int kNumValues = 1000000; |
51 | values.reserve(kNumValues); |
52 | // Using a seed to avoid flakiness. |
53 | constexpr uint32_t kSeed = 0x123456; |
54 | std::mt19937 gen(kSeed); |
55 | std::uniform_real_distribution<FLT> dis(0.0f, 1000.0f); |
56 | for (int i = 0; i < kNumValues; ++i) { |
57 | values.push_back(dis(gen)); |
58 | } |
59 | |
60 | const auto t1 = std::chrono::high_resolution_clock::now(); |
61 | const auto sum = SUM(values); |
62 | const auto t2 = std::chrono::high_resolution_clock::now(); |
63 | printf("sum: %.8f\n" , sum); |
64 | std::cout << "runtime: " |
65 | << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1) |
66 | .count() / |
67 | 1000.0 |
68 | << "ms\n" ; |
69 | return 0; |
70 | } |
71 | |