1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* Copyright (c) 2020 Mellanox Technologies. */ |
3 | |
4 | #include <linux/refcount.h> |
5 | #include <linux/list.h> |
6 | #include <linux/rculist.h> |
7 | #include <linux/rtnetlink.h> |
8 | #include <linux/workqueue.h> |
9 | #include <linux/spinlock.h> |
10 | #include <linux/notifier.h> |
11 | #include <net/netevent.h> |
12 | #include <net/arp.h> |
13 | #include "neigh.h" |
14 | #include "tc.h" |
15 | #include "en_rep.h" |
16 | #include "fs_core.h" |
17 | #include "diag/en_rep_tracepoint.h" |
18 | |
19 | static unsigned long mlx5e_rep_ipv6_interval(void) |
20 | { |
21 | if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) |
22 | return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); |
23 | |
24 | return ~0UL; |
25 | } |
26 | |
27 | static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) |
28 | { |
29 | unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); |
30 | unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); |
31 | struct net_device *netdev = rpriv->netdev; |
32 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
33 | |
34 | rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); |
35 | mlx5_fc_update_sampling_interval(dev: priv->mdev, interval: rpriv->neigh_update.min_interval); |
36 | } |
37 | |
38 | void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) |
39 | { |
40 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
41 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
42 | |
43 | mlx5_fc_queue_stats_work(dev: priv->mdev, |
44 | dwork: &neigh_update->neigh_stats_work, |
45 | delay: neigh_update->min_interval); |
46 | } |
47 | |
48 | static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) |
49 | { |
50 | return refcount_inc_not_zero(r: &nhe->refcnt); |
51 | } |
52 | |
53 | static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); |
54 | |
55 | void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) |
56 | { |
57 | if (refcount_dec_and_test(r: &nhe->refcnt)) { |
58 | mlx5e_rep_neigh_entry_remove(nhe); |
59 | kfree_rcu(nhe, rcu); |
60 | } |
61 | } |
62 | |
63 | static struct mlx5e_neigh_hash_entry * |
64 | mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, |
65 | struct mlx5e_neigh_hash_entry *nhe) |
66 | { |
67 | struct mlx5e_neigh_hash_entry *next = NULL; |
68 | |
69 | rcu_read_lock(); |
70 | |
71 | for (next = nhe ? |
72 | list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, |
73 | &nhe->neigh_list, |
74 | struct mlx5e_neigh_hash_entry, |
75 | neigh_list) : |
76 | list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, |
77 | struct mlx5e_neigh_hash_entry, |
78 | neigh_list); |
79 | next; |
80 | next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, |
81 | &next->neigh_list, |
82 | struct mlx5e_neigh_hash_entry, |
83 | neigh_list)) |
84 | if (mlx5e_rep_neigh_entry_hold(nhe: next)) |
85 | break; |
86 | |
87 | rcu_read_unlock(); |
88 | |
89 | if (nhe) |
90 | mlx5e_rep_neigh_entry_release(nhe); |
91 | |
92 | return next; |
93 | } |
94 | |
95 | static void mlx5e_rep_neigh_stats_work(struct work_struct *work) |
96 | { |
97 | struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, |
98 | neigh_update.neigh_stats_work.work); |
99 | struct net_device *netdev = rpriv->netdev; |
100 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
101 | struct mlx5e_neigh_hash_entry *nhe = NULL; |
102 | |
103 | rtnl_lock(); |
104 | if (!list_empty(head: &rpriv->neigh_update.neigh_list)) |
105 | mlx5e_rep_queue_neigh_stats_work(priv); |
106 | |
107 | while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) |
108 | mlx5e_tc_update_neigh_used_value(nhe); |
109 | |
110 | rtnl_unlock(); |
111 | } |
112 | |
113 | struct neigh_update_work { |
114 | struct work_struct work; |
115 | struct neighbour *n; |
116 | struct mlx5e_neigh_hash_entry *nhe; |
117 | }; |
118 | |
119 | static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) |
120 | { |
121 | neigh_release(neigh: update_work->n); |
122 | mlx5e_rep_neigh_entry_release(nhe: update_work->nhe); |
123 | kfree(objp: update_work); |
124 | } |
125 | |
126 | static void mlx5e_rep_neigh_update(struct work_struct *work) |
127 | { |
128 | struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, |
129 | work); |
130 | struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; |
131 | struct neighbour *n = update_work->n; |
132 | struct mlx5e_encap_entry *e = NULL; |
133 | bool neigh_connected, same_dev; |
134 | unsigned char ha[ETH_ALEN]; |
135 | u8 nud_state, dead; |
136 | |
137 | rtnl_lock(); |
138 | |
139 | /* If these parameters are changed after we release the lock, |
140 | * we'll receive another event letting us know about it. |
141 | * We use this lock to avoid inconsistency between the neigh validity |
142 | * and it's hw address. |
143 | */ |
144 | read_lock_bh(&n->lock); |
145 | memcpy(ha, n->ha, ETH_ALEN); |
146 | nud_state = n->nud_state; |
147 | dead = n->dead; |
148 | same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; |
149 | read_unlock_bh(&n->lock); |
150 | |
151 | neigh_connected = (nud_state & NUD_VALID) && !dead; |
152 | |
153 | trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); |
154 | |
155 | if (!same_dev) |
156 | goto out; |
157 | |
158 | /* mlx5e_get_next_init_encap() releases previous encap before returning |
159 | * the next one. |
160 | */ |
161 | while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) |
162 | mlx5e_rep_update_flows(priv: netdev_priv(dev: e->out_dev), e, neigh_connected, ha); |
163 | |
164 | out: |
165 | rtnl_unlock(); |
166 | mlx5e_release_neigh_update_work(update_work); |
167 | } |
168 | |
169 | static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, |
170 | struct neighbour *n) |
171 | { |
172 | struct neigh_update_work *update_work; |
173 | struct mlx5e_neigh_hash_entry *nhe; |
174 | struct mlx5e_neigh m_neigh = {}; |
175 | |
176 | update_work = kzalloc(size: sizeof(*update_work), GFP_ATOMIC); |
177 | if (WARN_ON(!update_work)) |
178 | return NULL; |
179 | |
180 | m_neigh.family = n->ops->family; |
181 | memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); |
182 | |
183 | /* Obtain reference to nhe as last step in order not to release it in |
184 | * atomic context. |
185 | */ |
186 | rcu_read_lock(); |
187 | nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh: &m_neigh); |
188 | rcu_read_unlock(); |
189 | if (!nhe) { |
190 | kfree(objp: update_work); |
191 | return NULL; |
192 | } |
193 | |
194 | INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); |
195 | neigh_hold(n); |
196 | update_work->n = n; |
197 | update_work->nhe = nhe; |
198 | |
199 | return update_work; |
200 | } |
201 | |
202 | static int mlx5e_rep_netevent_event(struct notifier_block *nb, |
203 | unsigned long event, void *ptr) |
204 | { |
205 | struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, |
206 | neigh_update.netevent_nb); |
207 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
208 | struct net_device *netdev = rpriv->netdev; |
209 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
210 | struct mlx5e_neigh_hash_entry *nhe = NULL; |
211 | struct neigh_update_work *update_work; |
212 | struct neigh_parms *p; |
213 | struct neighbour *n; |
214 | bool found = false; |
215 | |
216 | switch (event) { |
217 | case NETEVENT_NEIGH_UPDATE: |
218 | n = ptr; |
219 | #if IS_ENABLED(CONFIG_IPV6) |
220 | if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) |
221 | #else |
222 | if (n->tbl != &arp_tbl) |
223 | #endif |
224 | return NOTIFY_DONE; |
225 | |
226 | update_work = mlx5e_alloc_neigh_update_work(priv, n); |
227 | if (!update_work) |
228 | return NOTIFY_DONE; |
229 | |
230 | queue_work(wq: priv->wq, work: &update_work->work); |
231 | break; |
232 | |
233 | case NETEVENT_DELAY_PROBE_TIME_UPDATE: |
234 | p = ptr; |
235 | |
236 | /* We check the device is present since we don't care about |
237 | * changes in the default table, we only care about changes |
238 | * done per device delay prob time parameter. |
239 | */ |
240 | #if IS_ENABLED(CONFIG_IPV6) |
241 | if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) |
242 | #else |
243 | if (!p->dev || p->tbl != &arp_tbl) |
244 | #endif |
245 | return NOTIFY_DONE; |
246 | |
247 | rcu_read_lock(); |
248 | list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, |
249 | neigh_list) { |
250 | if (p->dev == READ_ONCE(nhe->neigh_dev)) { |
251 | found = true; |
252 | break; |
253 | } |
254 | } |
255 | rcu_read_unlock(); |
256 | if (!found) |
257 | return NOTIFY_DONE; |
258 | |
259 | neigh_update->min_interval = min_t(unsigned long, |
260 | NEIGH_VAR(p, DELAY_PROBE_TIME), |
261 | neigh_update->min_interval); |
262 | mlx5_fc_update_sampling_interval(dev: priv->mdev, |
263 | interval: neigh_update->min_interval); |
264 | break; |
265 | } |
266 | return NOTIFY_DONE; |
267 | } |
268 | |
269 | static const struct rhashtable_params mlx5e_neigh_ht_params = { |
270 | .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), |
271 | .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), |
272 | .key_len = sizeof(struct mlx5e_neigh), |
273 | .automatic_shrinking = true, |
274 | }; |
275 | |
276 | int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) |
277 | { |
278 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
279 | int err; |
280 | |
281 | err = rhashtable_init(ht: &neigh_update->neigh_ht, params: &mlx5e_neigh_ht_params); |
282 | if (err) |
283 | goto out_err; |
284 | |
285 | INIT_LIST_HEAD(list: &neigh_update->neigh_list); |
286 | mutex_init(&neigh_update->encap_lock); |
287 | INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, |
288 | mlx5e_rep_neigh_stats_work); |
289 | mlx5e_rep_neigh_update_init_interval(rpriv); |
290 | |
291 | neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; |
292 | err = register_netevent_notifier(nb: &neigh_update->netevent_nb); |
293 | if (err) |
294 | goto out_notifier; |
295 | return 0; |
296 | |
297 | out_notifier: |
298 | neigh_update->netevent_nb.notifier_call = NULL; |
299 | rhashtable_destroy(ht: &neigh_update->neigh_ht); |
300 | out_err: |
301 | netdev_warn(dev: rpriv->netdev, |
302 | format: "Failed to initialize neighbours handling for vport %d\n" , |
303 | rpriv->rep->vport); |
304 | return err; |
305 | } |
306 | |
307 | void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) |
308 | { |
309 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
310 | struct mlx5e_priv *priv = netdev_priv(dev: rpriv->netdev); |
311 | |
312 | if (!rpriv->neigh_update.netevent_nb.notifier_call) |
313 | return; |
314 | |
315 | unregister_netevent_notifier(nb: &neigh_update->netevent_nb); |
316 | |
317 | flush_workqueue(priv->wq); /* flush neigh update works */ |
318 | |
319 | cancel_delayed_work_sync(dwork: &rpriv->neigh_update.neigh_stats_work); |
320 | |
321 | mutex_destroy(lock: &neigh_update->encap_lock); |
322 | rhashtable_destroy(ht: &neigh_update->neigh_ht); |
323 | } |
324 | |
325 | static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, |
326 | struct mlx5e_neigh_hash_entry *nhe) |
327 | { |
328 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
329 | int err; |
330 | |
331 | err = rhashtable_insert_fast(ht: &rpriv->neigh_update.neigh_ht, |
332 | obj: &nhe->rhash_node, |
333 | params: mlx5e_neigh_ht_params); |
334 | if (err) |
335 | return err; |
336 | |
337 | list_add_rcu(new: &nhe->neigh_list, head: &rpriv->neigh_update.neigh_list); |
338 | |
339 | return err; |
340 | } |
341 | |
342 | static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) |
343 | { |
344 | struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; |
345 | |
346 | mutex_lock(&rpriv->neigh_update.encap_lock); |
347 | |
348 | list_del_rcu(entry: &nhe->neigh_list); |
349 | |
350 | rhashtable_remove_fast(ht: &rpriv->neigh_update.neigh_ht, |
351 | obj: &nhe->rhash_node, |
352 | params: mlx5e_neigh_ht_params); |
353 | mutex_unlock(lock: &rpriv->neigh_update.encap_lock); |
354 | } |
355 | |
356 | /* This function must only be called under the representor's encap_lock or |
357 | * inside rcu read lock section. |
358 | */ |
359 | struct mlx5e_neigh_hash_entry * |
360 | mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, |
361 | struct mlx5e_neigh *m_neigh) |
362 | { |
363 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
364 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
365 | struct mlx5e_neigh_hash_entry *nhe; |
366 | |
367 | nhe = rhashtable_lookup_fast(ht: &neigh_update->neigh_ht, key: m_neigh, |
368 | params: mlx5e_neigh_ht_params); |
369 | return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; |
370 | } |
371 | |
372 | int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, |
373 | struct mlx5e_neigh *m_neigh, |
374 | struct net_device *neigh_dev, |
375 | struct mlx5e_neigh_hash_entry **nhe) |
376 | { |
377 | int err; |
378 | |
379 | *nhe = kzalloc(size: sizeof(**nhe), GFP_KERNEL); |
380 | if (!*nhe) |
381 | return -ENOMEM; |
382 | |
383 | (*nhe)->priv = priv; |
384 | memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); |
385 | spin_lock_init(&(*nhe)->encap_list_lock); |
386 | INIT_LIST_HEAD(list: &(*nhe)->encap_list); |
387 | refcount_set(r: &(*nhe)->refcnt, n: 1); |
388 | WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); |
389 | |
390 | err = mlx5e_rep_neigh_entry_insert(priv, nhe: *nhe); |
391 | if (err) |
392 | goto out_free; |
393 | return 0; |
394 | |
395 | out_free: |
396 | kfree(objp: *nhe); |
397 | return err; |
398 | } |
399 | |