1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define CREATE_TRACE_POINTS |
3 | #include <trace/events/mmap_lock.h> |
4 | |
5 | #include <linux/mm.h> |
6 | #include <linux/cgroup.h> |
7 | #include <linux/memcontrol.h> |
8 | #include <linux/mmap_lock.h> |
9 | #include <linux/mutex.h> |
10 | #include <linux/percpu.h> |
11 | #include <linux/rcupdate.h> |
12 | #include <linux/smp.h> |
13 | #include <linux/trace_events.h> |
14 | #include <linux/local_lock.h> |
15 | |
16 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); |
17 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); |
18 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); |
19 | |
20 | #ifdef CONFIG_MEMCG |
21 | |
22 | /* |
23 | * Our various events all share the same buffer (because we don't want or need |
24 | * to allocate a set of buffers *per event type*), so we need to protect against |
25 | * concurrent _reg() and _unreg() calls, and count how many _reg() calls have |
26 | * been made. |
27 | */ |
28 | static DEFINE_MUTEX(reg_lock); |
29 | static int reg_refcount; /* Protected by reg_lock. */ |
30 | |
31 | /* |
32 | * Size of the buffer for memcg path names. Ignoring stack trace support, |
33 | * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. |
34 | */ |
35 | #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL |
36 | |
37 | /* |
38 | * How many contexts our trace events might be called in: normal, softirq, irq, |
39 | * and NMI. |
40 | */ |
41 | #define CONTEXT_COUNT 4 |
42 | |
43 | struct memcg_path { |
44 | local_lock_t lock; |
45 | char __rcu *buf; |
46 | local_t buf_idx; |
47 | }; |
48 | static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = { |
49 | .lock = INIT_LOCAL_LOCK(lock), |
50 | .buf_idx = LOCAL_INIT(0), |
51 | }; |
52 | |
53 | static char **tmp_bufs; |
54 | |
55 | /* Called with reg_lock held. */ |
56 | static void free_memcg_path_bufs(void) |
57 | { |
58 | struct memcg_path *memcg_path; |
59 | int cpu; |
60 | char **old = tmp_bufs; |
61 | |
62 | for_each_possible_cpu(cpu) { |
63 | memcg_path = per_cpu_ptr(&memcg_paths, cpu); |
64 | *(old++) = rcu_dereference_protected(memcg_path->buf, |
65 | lockdep_is_held(®_lock)); |
66 | rcu_assign_pointer(memcg_path->buf, NULL); |
67 | } |
68 | |
69 | /* Wait for inflight memcg_path_buf users to finish. */ |
70 | synchronize_rcu(); |
71 | |
72 | old = tmp_bufs; |
73 | for_each_possible_cpu(cpu) { |
74 | kfree(objp: *(old++)); |
75 | } |
76 | |
77 | kfree(objp: tmp_bufs); |
78 | tmp_bufs = NULL; |
79 | } |
80 | |
81 | int trace_mmap_lock_reg(void) |
82 | { |
83 | int cpu; |
84 | char *new; |
85 | |
86 | mutex_lock(®_lock); |
87 | |
88 | /* If the refcount is going 0->1, proceed with allocating buffers. */ |
89 | if (reg_refcount++) |
90 | goto out; |
91 | |
92 | tmp_bufs = kmalloc_array(num_possible_cpus(), size: sizeof(*tmp_bufs), |
93 | GFP_KERNEL); |
94 | if (tmp_bufs == NULL) |
95 | goto out_fail; |
96 | |
97 | for_each_possible_cpu(cpu) { |
98 | new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); |
99 | if (new == NULL) |
100 | goto out_fail_free; |
101 | rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new); |
102 | /* Don't need to wait for inflights, they'd have gotten NULL. */ |
103 | } |
104 | |
105 | out: |
106 | mutex_unlock(lock: ®_lock); |
107 | return 0; |
108 | |
109 | out_fail_free: |
110 | free_memcg_path_bufs(); |
111 | out_fail: |
112 | /* Since we failed, undo the earlier ref increment. */ |
113 | --reg_refcount; |
114 | |
115 | mutex_unlock(lock: ®_lock); |
116 | return -ENOMEM; |
117 | } |
118 | |
119 | void trace_mmap_lock_unreg(void) |
120 | { |
121 | mutex_lock(®_lock); |
122 | |
123 | /* If the refcount is going 1->0, proceed with freeing buffers. */ |
124 | if (--reg_refcount) |
125 | goto out; |
126 | |
127 | free_memcg_path_bufs(); |
128 | |
129 | out: |
130 | mutex_unlock(lock: ®_lock); |
131 | } |
132 | |
133 | static inline char *get_memcg_path_buf(void) |
134 | { |
135 | struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths); |
136 | char *buf; |
137 | int idx; |
138 | |
139 | rcu_read_lock(); |
140 | buf = rcu_dereference(memcg_path->buf); |
141 | if (buf == NULL) { |
142 | rcu_read_unlock(); |
143 | return NULL; |
144 | } |
145 | idx = local_add_return(MEMCG_PATH_BUF_SIZE, l: &memcg_path->buf_idx) - |
146 | MEMCG_PATH_BUF_SIZE; |
147 | return &buf[idx]; |
148 | } |
149 | |
150 | static inline void put_memcg_path_buf(void) |
151 | { |
152 | local_sub(MEMCG_PATH_BUF_SIZE, l: &this_cpu_ptr(&memcg_paths)->buf_idx); |
153 | rcu_read_unlock(); |
154 | } |
155 | |
156 | #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ |
157 | do { \ |
158 | const char *memcg_path; \ |
159 | local_lock(&memcg_paths.lock); \ |
160 | memcg_path = get_mm_memcg_path(mm); \ |
161 | trace_mmap_lock_##type(mm, \ |
162 | memcg_path != NULL ? memcg_path : "", \ |
163 | ##__VA_ARGS__); \ |
164 | if (likely(memcg_path != NULL)) \ |
165 | put_memcg_path_buf(); \ |
166 | local_unlock(&memcg_paths.lock); \ |
167 | } while (0) |
168 | |
169 | #else /* !CONFIG_MEMCG */ |
170 | |
171 | int trace_mmap_lock_reg(void) |
172 | { |
173 | return 0; |
174 | } |
175 | |
176 | void trace_mmap_lock_unreg(void) |
177 | { |
178 | } |
179 | |
180 | #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ |
181 | trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) |
182 | |
183 | #endif /* CONFIG_MEMCG */ |
184 | |
185 | #ifdef CONFIG_TRACING |
186 | #ifdef CONFIG_MEMCG |
187 | /* |
188 | * Write the given mm_struct's memcg path to a percpu buffer, and return a |
189 | * pointer to it. If the path cannot be determined, or no buffer was available |
190 | * (because the trace event is being unregistered), NULL is returned. |
191 | * |
192 | * Note: buffers are allocated per-cpu to avoid locking, so preemption must be |
193 | * disabled by the caller before calling us, and re-enabled only after the |
194 | * caller is done with the pointer. |
195 | * |
196 | * The caller must call put_memcg_path_buf() once the buffer is no longer |
197 | * needed. This must be done while preemption is still disabled. |
198 | */ |
199 | static const char *get_mm_memcg_path(struct mm_struct *mm) |
200 | { |
201 | char *buf = NULL; |
202 | struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); |
203 | |
204 | if (memcg == NULL) |
205 | goto out; |
206 | if (unlikely(memcg->css.cgroup == NULL)) |
207 | goto out_put; |
208 | |
209 | buf = get_memcg_path_buf(); |
210 | if (buf == NULL) |
211 | goto out_put; |
212 | |
213 | cgroup_path(cgrp: memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE); |
214 | |
215 | out_put: |
216 | css_put(css: &memcg->css); |
217 | out: |
218 | return buf; |
219 | } |
220 | |
221 | #endif /* CONFIG_MEMCG */ |
222 | |
223 | /* |
224 | * Trace calls must be in a separate file, as otherwise there's a circular |
225 | * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. |
226 | */ |
227 | |
228 | void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) |
229 | { |
230 | TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); |
231 | } |
232 | EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); |
233 | |
234 | void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, |
235 | bool success) |
236 | { |
237 | TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); |
238 | } |
239 | EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); |
240 | |
241 | void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) |
242 | { |
243 | TRACE_MMAP_LOCK_EVENT(released, mm, write); |
244 | } |
245 | EXPORT_SYMBOL(__mmap_lock_do_trace_released); |
246 | #endif /* CONFIG_TRACING */ |
247 | |