| 1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
| 2 | // Copyright (c) 2023 Google |
| 3 | #include "vmlinux.h" |
| 4 | #include <bpf/bpf_helpers.h> |
| 5 | #include <bpf/bpf_tracing.h> |
| 6 | #include <bpf/bpf_core_read.h> |
| 7 | |
| 8 | #include "sample-filter.h" |
| 9 | |
| 10 | /* BPF map that will be filled by user space */ |
| 11 | struct filters { |
| 12 | __uint(type, BPF_MAP_TYPE_HASH); |
| 13 | __type(key, int); |
| 14 | __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]); |
| 15 | __uint(max_entries, 1); |
| 16 | } filters SEC(".maps" ); |
| 17 | |
| 18 | /* |
| 19 | * An evsel has multiple instances for each CPU or task but we need a single |
| 20 | * id to be used as a key for the idx_hash. This hashmap would translate the |
| 21 | * instance's ID to a representative ID. |
| 22 | */ |
| 23 | struct event_hash { |
| 24 | __uint(type, BPF_MAP_TYPE_HASH); |
| 25 | __type(key, __u64); |
| 26 | __type(value, __u64); |
| 27 | __uint(max_entries, 1); |
| 28 | } event_hash SEC(".maps" ); |
| 29 | |
| 30 | /* tgid/evtid to filter index */ |
| 31 | struct idx_hash { |
| 32 | __uint(type, BPF_MAP_TYPE_HASH); |
| 33 | __type(key, struct idx_hash_key); |
| 34 | __type(value, int); |
| 35 | __uint(max_entries, 1); |
| 36 | } idx_hash SEC(".maps" ); |
| 37 | |
| 38 | /* tgid to filter index */ |
| 39 | struct lost_count { |
| 40 | __uint(type, BPF_MAP_TYPE_ARRAY); |
| 41 | __type(key, int); |
| 42 | __type(value, int); |
| 43 | __uint(max_entries, 1); |
| 44 | } dropped SEC(".maps" ); |
| 45 | |
| 46 | volatile const int use_idx_hash; |
| 47 | |
| 48 | void *bpf_cast_to_kern_ctx(void *) __ksym; |
| 49 | |
| 50 | /* new kernel perf_sample_data definition */ |
| 51 | struct perf_sample_data___new { |
| 52 | __u64 sample_flags; |
| 53 | } __attribute__((preserve_access_index)); |
| 54 | |
| 55 | /* new kernel perf_mem_data_src definition */ |
| 56 | union perf_mem_data_src___new { |
| 57 | __u64 val; |
| 58 | struct { |
| 59 | __u64 mem_op:5, /* type of opcode */ |
| 60 | mem_lvl:14, /* memory hierarchy level */ |
| 61 | mem_snoop:5, /* snoop mode */ |
| 62 | mem_lock:2, /* lock instr */ |
| 63 | mem_dtlb:7, /* tlb access */ |
| 64 | mem_lvl_num:4, /* memory hierarchy level number */ |
| 65 | mem_remote:1, /* remote */ |
| 66 | mem_snoopx:2, /* snoop mode, ext */ |
| 67 | mem_blk:3, /* access blocked */ |
| 68 | mem_hops:3, /* hop level */ |
| 69 | mem_rsvd:18; |
| 70 | }; |
| 71 | }; |
| 72 | |
| 73 | /* helper function to return the given perf sample data */ |
| 74 | static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, |
| 75 | struct perf_bpf_filter_entry *entry) |
| 76 | { |
| 77 | struct perf_sample_data___new *data = (void *)kctx->data; |
| 78 | |
| 79 | if (!bpf_core_field_exists(data->sample_flags)) |
| 80 | return 0; |
| 81 | |
| 82 | #define BUILD_CHECK_SAMPLE(x) \ |
| 83 | _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \ |
| 84 | "Mismatched PBF term to sample bit " #x) |
| 85 | BUILD_CHECK_SAMPLE(IP); |
| 86 | BUILD_CHECK_SAMPLE(TID); |
| 87 | BUILD_CHECK_SAMPLE(TIME); |
| 88 | BUILD_CHECK_SAMPLE(ADDR); |
| 89 | BUILD_CHECK_SAMPLE(ID); |
| 90 | BUILD_CHECK_SAMPLE(CPU); |
| 91 | BUILD_CHECK_SAMPLE(PERIOD); |
| 92 | BUILD_CHECK_SAMPLE(WEIGHT); |
| 93 | BUILD_CHECK_SAMPLE(DATA_SRC); |
| 94 | BUILD_CHECK_SAMPLE(TRANSACTION); |
| 95 | BUILD_CHECK_SAMPLE(PHYS_ADDR); |
| 96 | BUILD_CHECK_SAMPLE(CGROUP); |
| 97 | BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE); |
| 98 | BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE); |
| 99 | BUILD_CHECK_SAMPLE(WEIGHT_STRUCT); |
| 100 | #undef BUILD_CHECK_SAMPLE |
| 101 | |
| 102 | /* For sample terms check the sample bit is set. */ |
| 103 | if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END && |
| 104 | (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0) |
| 105 | return 0; |
| 106 | |
| 107 | switch (entry->term) { |
| 108 | case PBF_TERM_IP: |
| 109 | return kctx->data->ip; |
| 110 | case PBF_TERM_ID: |
| 111 | return kctx->data->id; |
| 112 | case PBF_TERM_TID: |
| 113 | if (entry->part) |
| 114 | return kctx->data->tid_entry.pid; |
| 115 | else |
| 116 | return kctx->data->tid_entry.tid; |
| 117 | case PBF_TERM_CPU: |
| 118 | return kctx->data->cpu_entry.cpu; |
| 119 | case PBF_TERM_TIME: |
| 120 | return kctx->data->time; |
| 121 | case PBF_TERM_ADDR: |
| 122 | return kctx->data->addr; |
| 123 | case PBF_TERM_PERIOD: |
| 124 | return kctx->data->period; |
| 125 | case PBF_TERM_TRANSACTION: |
| 126 | return kctx->data->txn; |
| 127 | case PBF_TERM_WEIGHT_STRUCT: |
| 128 | if (entry->part == 1) |
| 129 | return kctx->data->weight.var1_dw; |
| 130 | if (entry->part == 2) |
| 131 | return kctx->data->weight.var2_w; |
| 132 | if (entry->part == 3) |
| 133 | return kctx->data->weight.var3_w; |
| 134 | /* fall through */ |
| 135 | case PBF_TERM_WEIGHT: |
| 136 | return kctx->data->weight.full; |
| 137 | case PBF_TERM_PHYS_ADDR: |
| 138 | return kctx->data->phys_addr; |
| 139 | case PBF_TERM_CGROUP: |
| 140 | return kctx->data->cgroup; |
| 141 | case PBF_TERM_CODE_PAGE_SIZE: |
| 142 | return kctx->data->code_page_size; |
| 143 | case PBF_TERM_DATA_PAGE_SIZE: |
| 144 | return kctx->data->data_page_size; |
| 145 | case PBF_TERM_DATA_SRC: |
| 146 | if (entry->part == 1) |
| 147 | return kctx->data->data_src.mem_op; |
| 148 | if (entry->part == 2) |
| 149 | return kctx->data->data_src.mem_lvl_num; |
| 150 | if (entry->part == 3) { |
| 151 | __u32 snoop = kctx->data->data_src.mem_snoop; |
| 152 | __u32 snoopx = kctx->data->data_src.mem_snoopx; |
| 153 | |
| 154 | return (snoopx << 5) | snoop; |
| 155 | } |
| 156 | if (entry->part == 4) |
| 157 | return kctx->data->data_src.mem_remote; |
| 158 | if (entry->part == 5) |
| 159 | return kctx->data->data_src.mem_lock; |
| 160 | if (entry->part == 6) |
| 161 | return kctx->data->data_src.mem_dtlb; |
| 162 | if (entry->part == 7) |
| 163 | return kctx->data->data_src.mem_blk; |
| 164 | if (entry->part == 8) { |
| 165 | union perf_mem_data_src___new *data = (void *)&kctx->data->data_src; |
| 166 | |
| 167 | if (__builtin_preserve_field_info(data->mem_hops, BPF_FIELD_EXISTS)) |
| 168 | return data->mem_hops; |
| 169 | |
| 170 | return 0; |
| 171 | } |
| 172 | /* return the whole word */ |
| 173 | return kctx->data->data_src.val; |
| 174 | case PBF_TERM_UID: |
| 175 | return bpf_get_current_uid_gid() & 0xFFFFFFFF; |
| 176 | case PBF_TERM_GID: |
| 177 | return bpf_get_current_uid_gid() >> 32; |
| 178 | case PBF_TERM_NONE: |
| 179 | case __PBF_UNUSED_TERM4: |
| 180 | case __PBF_UNUSED_TERM5: |
| 181 | case __PBF_UNUSED_TERM9: |
| 182 | case __PBF_UNUSED_TERM10: |
| 183 | case __PBF_UNUSED_TERM11: |
| 184 | case __PBF_UNUSED_TERM12: |
| 185 | case __PBF_UNUSED_TERM13: |
| 186 | case __PBF_UNUSED_TERM16: |
| 187 | case __PBF_UNUSED_TERM18: |
| 188 | case __PBF_UNUSED_TERM20: |
| 189 | default: |
| 190 | break; |
| 191 | } |
| 192 | return 0; |
| 193 | } |
| 194 | |
| 195 | #define CHECK_RESULT(data, op, val) \ |
| 196 | if (!(data op val)) { \ |
| 197 | if (!in_group) \ |
| 198 | goto drop; \ |
| 199 | } else if (in_group) { \ |
| 200 | group_result = 1; \ |
| 201 | } |
| 202 | |
| 203 | /* BPF program to be called from perf event overflow handler */ |
| 204 | SEC("perf_event" ) |
| 205 | int perf_sample_filter(void *ctx) |
| 206 | { |
| 207 | struct bpf_perf_event_data_kern *kctx; |
| 208 | struct perf_bpf_filter_entry *entry; |
| 209 | __u64 sample_data; |
| 210 | int in_group = 0; |
| 211 | int group_result = 0; |
| 212 | int i, k; |
| 213 | int *losts; |
| 214 | |
| 215 | kctx = bpf_cast_to_kern_ctx(ctx); |
| 216 | |
| 217 | k = 0; |
| 218 | |
| 219 | if (use_idx_hash) { |
| 220 | struct idx_hash_key key = { |
| 221 | .tgid = bpf_get_current_pid_tgid() >> 32, |
| 222 | }; |
| 223 | __u64 eid = kctx->event->id; |
| 224 | __u64 *key_id; |
| 225 | int *idx; |
| 226 | |
| 227 | /* get primary_event_id */ |
| 228 | if (kctx->event->parent) |
| 229 | eid = kctx->event->parent->id; |
| 230 | |
| 231 | key_id = bpf_map_lookup_elem(&event_hash, &eid); |
| 232 | if (key_id == NULL) |
| 233 | goto drop; |
| 234 | |
| 235 | key.evt_id = *key_id; |
| 236 | |
| 237 | idx = bpf_map_lookup_elem(&idx_hash, &key); |
| 238 | if (idx) |
| 239 | k = *idx; |
| 240 | else |
| 241 | goto drop; |
| 242 | } |
| 243 | |
| 244 | entry = bpf_map_lookup_elem(&filters, &k); |
| 245 | if (entry == NULL) |
| 246 | goto drop; |
| 247 | |
| 248 | for (i = 0; i < MAX_FILTERS; i++) { |
| 249 | sample_data = perf_get_sample(kctx, &entry[i]); |
| 250 | |
| 251 | switch (entry[i].op) { |
| 252 | case PBF_OP_EQ: |
| 253 | CHECK_RESULT(sample_data, ==, entry[i].value) |
| 254 | break; |
| 255 | case PBF_OP_NEQ: |
| 256 | CHECK_RESULT(sample_data, !=, entry[i].value) |
| 257 | break; |
| 258 | case PBF_OP_GT: |
| 259 | CHECK_RESULT(sample_data, >, entry[i].value) |
| 260 | break; |
| 261 | case PBF_OP_GE: |
| 262 | CHECK_RESULT(sample_data, >=, entry[i].value) |
| 263 | break; |
| 264 | case PBF_OP_LT: |
| 265 | CHECK_RESULT(sample_data, <, entry[i].value) |
| 266 | break; |
| 267 | case PBF_OP_LE: |
| 268 | CHECK_RESULT(sample_data, <=, entry[i].value) |
| 269 | break; |
| 270 | case PBF_OP_AND: |
| 271 | CHECK_RESULT(sample_data, &, entry[i].value) |
| 272 | break; |
| 273 | case PBF_OP_GROUP_BEGIN: |
| 274 | in_group = 1; |
| 275 | group_result = 0; |
| 276 | break; |
| 277 | case PBF_OP_GROUP_END: |
| 278 | if (group_result == 0) |
| 279 | goto drop; |
| 280 | in_group = 0; |
| 281 | break; |
| 282 | case PBF_OP_DONE: |
| 283 | /* no failures so far, accept it */ |
| 284 | return 1; |
| 285 | } |
| 286 | } |
| 287 | /* generate sample data */ |
| 288 | return 1; |
| 289 | |
| 290 | drop: |
| 291 | losts = bpf_map_lookup_elem(&dropped, &k); |
| 292 | if (losts != NULL) |
| 293 | __sync_fetch_and_add(losts, 1); |
| 294 | |
| 295 | return 0; |
| 296 | } |
| 297 | |
| 298 | char LICENSE[] SEC("license" ) = "Dual BSD/GPL" ; |
| 299 | |