1 | /* |
2 | * Copyright (c) 2004 Topspin Communications. All rights reserved. |
3 | * Copyright (c) 2005 Intel Corporation. All rights reserved. |
4 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
5 | * Copyright (c) 2005 Voltaire, Inc. All rights reserved. |
6 | * |
7 | * This software is available to you under a choice of one of two |
8 | * licenses. You may choose to be licensed under the terms of the GNU |
9 | * General Public License (GPL) Version 2, available from the file |
10 | * COPYING in the main directory of this source tree, or the |
11 | * OpenIB.org BSD license below: |
12 | * |
13 | * Redistribution and use in source and binary forms, with or |
14 | * without modification, are permitted provided that the following |
15 | * conditions are met: |
16 | * |
17 | * - Redistributions of source code must retain the above |
18 | * copyright notice, this list of conditions and the following |
19 | * disclaimer. |
20 | * |
21 | * - Redistributions in binary form must reproduce the above |
22 | * copyright notice, this list of conditions and the following |
23 | * disclaimer in the documentation and/or other materials |
24 | * provided with the distribution. |
25 | * |
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
33 | * SOFTWARE. |
34 | */ |
35 | |
36 | #include <linux/if_vlan.h> |
37 | #include <linux/errno.h> |
38 | #include <linux/slab.h> |
39 | #include <linux/workqueue.h> |
40 | #include <linux/netdevice.h> |
41 | #include <net/addrconf.h> |
42 | |
43 | #include <rdma/ib_cache.h> |
44 | |
45 | #include "core_priv.h" |
46 | |
47 | struct ib_pkey_cache { |
48 | int table_len; |
49 | u16 table[] __counted_by(table_len); |
50 | }; |
51 | |
52 | struct ib_update_work { |
53 | struct work_struct work; |
54 | struct ib_event event; |
55 | bool enforce_security; |
56 | }; |
57 | |
58 | union ib_gid zgid; |
59 | EXPORT_SYMBOL(zgid); |
60 | |
61 | enum gid_attr_find_mask { |
62 | GID_ATTR_FIND_MASK_GID = 1UL << 0, |
63 | GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, |
64 | GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, |
65 | GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, |
66 | }; |
67 | |
68 | enum gid_table_entry_state { |
69 | GID_TABLE_ENTRY_INVALID = 1, |
70 | GID_TABLE_ENTRY_VALID = 2, |
71 | /* |
72 | * Indicates that entry is pending to be removed, there may |
73 | * be active users of this GID entry. |
74 | * When last user of the GID entry releases reference to it, |
75 | * GID entry is detached from the table. |
76 | */ |
77 | GID_TABLE_ENTRY_PENDING_DEL = 3, |
78 | }; |
79 | |
80 | struct roce_gid_ndev_storage { |
81 | struct rcu_head rcu_head; |
82 | struct net_device *ndev; |
83 | }; |
84 | |
85 | struct ib_gid_table_entry { |
86 | struct kref kref; |
87 | struct work_struct del_work; |
88 | struct ib_gid_attr attr; |
89 | void *context; |
90 | /* Store the ndev pointer to release reference later on in |
91 | * call_rcu context because by that time gid_table_entry |
92 | * and attr might be already freed. So keep a copy of it. |
93 | * ndev_storage is freed by rcu callback. |
94 | */ |
95 | struct roce_gid_ndev_storage *ndev_storage; |
96 | enum gid_table_entry_state state; |
97 | }; |
98 | |
99 | struct ib_gid_table { |
100 | int sz; |
101 | /* In RoCE, adding a GID to the table requires: |
102 | * (a) Find if this GID is already exists. |
103 | * (b) Find a free space. |
104 | * (c) Write the new GID |
105 | * |
106 | * Delete requires different set of operations: |
107 | * (a) Find the GID |
108 | * (b) Delete it. |
109 | * |
110 | **/ |
111 | /* Any writer to data_vec must hold this lock and the write side of |
112 | * rwlock. Readers must hold only rwlock. All writers must be in a |
113 | * sleepable context. |
114 | */ |
115 | struct mutex lock; |
116 | /* rwlock protects data_vec[ix]->state and entry pointer. |
117 | */ |
118 | rwlock_t rwlock; |
119 | struct ib_gid_table_entry **data_vec; |
120 | /* bit field, each bit indicates the index of default GID */ |
121 | u32 default_gid_indices; |
122 | }; |
123 | |
124 | static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port) |
125 | { |
126 | struct ib_event event; |
127 | |
128 | event.device = ib_dev; |
129 | event.element.port_num = port; |
130 | event.event = IB_EVENT_GID_CHANGE; |
131 | |
132 | ib_dispatch_event_clients(event: &event); |
133 | } |
134 | |
135 | static const char * const gid_type_str[] = { |
136 | /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for |
137 | * user space compatibility reasons. |
138 | */ |
139 | [IB_GID_TYPE_IB] = "IB/RoCE v1" , |
140 | [IB_GID_TYPE_ROCE] = "IB/RoCE v1" , |
141 | [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2" , |
142 | }; |
143 | |
144 | const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) |
145 | { |
146 | if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) |
147 | return gid_type_str[gid_type]; |
148 | |
149 | return "Invalid GID type" ; |
150 | } |
151 | EXPORT_SYMBOL(ib_cache_gid_type_str); |
152 | |
153 | /** rdma_is_zero_gid - Check if given GID is zero or not. |
154 | * @gid: GID to check |
155 | * Returns true if given GID is zero, returns false otherwise. |
156 | */ |
157 | bool rdma_is_zero_gid(const union ib_gid *gid) |
158 | { |
159 | return !memcmp(p: gid, q: &zgid, size: sizeof(*gid)); |
160 | } |
161 | EXPORT_SYMBOL(rdma_is_zero_gid); |
162 | |
163 | /** is_gid_index_default - Check if a given index belongs to |
164 | * reserved default GIDs or not. |
165 | * @table: GID table pointer |
166 | * @index: Index to check in GID table |
167 | * Returns true if index is one of the reserved default GID index otherwise |
168 | * returns false. |
169 | */ |
170 | static bool is_gid_index_default(const struct ib_gid_table *table, |
171 | unsigned int index) |
172 | { |
173 | return index < 32 && (BIT(index) & table->default_gid_indices); |
174 | } |
175 | |
176 | int ib_cache_gid_parse_type_str(const char *buf) |
177 | { |
178 | unsigned int i; |
179 | size_t len; |
180 | int err = -EINVAL; |
181 | |
182 | len = strlen(buf); |
183 | if (len == 0) |
184 | return -EINVAL; |
185 | |
186 | if (buf[len - 1] == '\n') |
187 | len--; |
188 | |
189 | for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) |
190 | if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && |
191 | len == strlen(gid_type_str[i])) { |
192 | err = i; |
193 | break; |
194 | } |
195 | |
196 | return err; |
197 | } |
198 | EXPORT_SYMBOL(ib_cache_gid_parse_type_str); |
199 | |
200 | static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port) |
201 | { |
202 | return device->port_data[port].cache.gid; |
203 | } |
204 | |
205 | static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) |
206 | { |
207 | return !entry; |
208 | } |
209 | |
210 | static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) |
211 | { |
212 | return entry && entry->state == GID_TABLE_ENTRY_VALID; |
213 | } |
214 | |
215 | static void schedule_free_gid(struct kref *kref) |
216 | { |
217 | struct ib_gid_table_entry *entry = |
218 | container_of(kref, struct ib_gid_table_entry, kref); |
219 | |
220 | queue_work(wq: ib_wq, work: &entry->del_work); |
221 | } |
222 | |
223 | static void put_gid_ndev(struct rcu_head *head) |
224 | { |
225 | struct roce_gid_ndev_storage *storage = |
226 | container_of(head, struct roce_gid_ndev_storage, rcu_head); |
227 | |
228 | WARN_ON(!storage->ndev); |
229 | /* At this point its safe to release netdev reference, |
230 | * as all callers working on gid_attr->ndev are done |
231 | * using this netdev. |
232 | */ |
233 | dev_put(dev: storage->ndev); |
234 | kfree(objp: storage); |
235 | } |
236 | |
237 | static void free_gid_entry_locked(struct ib_gid_table_entry *entry) |
238 | { |
239 | struct ib_device *device = entry->attr.device; |
240 | u32 port_num = entry->attr.port_num; |
241 | struct ib_gid_table *table = rdma_gid_table(device, port: port_num); |
242 | |
243 | dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n" , __func__, |
244 | port_num, entry->attr.index, entry->attr.gid.raw); |
245 | |
246 | write_lock_irq(&table->rwlock); |
247 | |
248 | /* |
249 | * The only way to avoid overwriting NULL in table is |
250 | * by comparing if it is same entry in table or not! |
251 | * If new entry in table is added by the time we free here, |
252 | * don't overwrite the table entry. |
253 | */ |
254 | if (entry == table->data_vec[entry->attr.index]) |
255 | table->data_vec[entry->attr.index] = NULL; |
256 | /* Now this index is ready to be allocated */ |
257 | write_unlock_irq(&table->rwlock); |
258 | |
259 | if (entry->ndev_storage) |
260 | call_rcu(head: &entry->ndev_storage->rcu_head, func: put_gid_ndev); |
261 | kfree(objp: entry); |
262 | } |
263 | |
264 | static void free_gid_entry(struct kref *kref) |
265 | { |
266 | struct ib_gid_table_entry *entry = |
267 | container_of(kref, struct ib_gid_table_entry, kref); |
268 | |
269 | free_gid_entry_locked(entry); |
270 | } |
271 | |
272 | /** |
273 | * free_gid_work - Release reference to the GID entry |
274 | * @work: Work structure to refer to GID entry which needs to be |
275 | * deleted. |
276 | * |
277 | * free_gid_work() frees the entry from the HCA's hardware table |
278 | * if provider supports it. It releases reference to netdevice. |
279 | */ |
280 | static void free_gid_work(struct work_struct *work) |
281 | { |
282 | struct ib_gid_table_entry *entry = |
283 | container_of(work, struct ib_gid_table_entry, del_work); |
284 | struct ib_device *device = entry->attr.device; |
285 | u32 port_num = entry->attr.port_num; |
286 | struct ib_gid_table *table = rdma_gid_table(device, port: port_num); |
287 | |
288 | mutex_lock(&table->lock); |
289 | free_gid_entry_locked(entry); |
290 | mutex_unlock(lock: &table->lock); |
291 | } |
292 | |
293 | static struct ib_gid_table_entry * |
294 | alloc_gid_entry(const struct ib_gid_attr *attr) |
295 | { |
296 | struct ib_gid_table_entry *entry; |
297 | struct net_device *ndev; |
298 | |
299 | entry = kzalloc(size: sizeof(*entry), GFP_KERNEL); |
300 | if (!entry) |
301 | return NULL; |
302 | |
303 | ndev = rcu_dereference_protected(attr->ndev, 1); |
304 | if (ndev) { |
305 | entry->ndev_storage = kzalloc(size: sizeof(*entry->ndev_storage), |
306 | GFP_KERNEL); |
307 | if (!entry->ndev_storage) { |
308 | kfree(objp: entry); |
309 | return NULL; |
310 | } |
311 | dev_hold(dev: ndev); |
312 | entry->ndev_storage->ndev = ndev; |
313 | } |
314 | kref_init(kref: &entry->kref); |
315 | memcpy(&entry->attr, attr, sizeof(*attr)); |
316 | INIT_WORK(&entry->del_work, free_gid_work); |
317 | entry->state = GID_TABLE_ENTRY_INVALID; |
318 | return entry; |
319 | } |
320 | |
321 | static void store_gid_entry(struct ib_gid_table *table, |
322 | struct ib_gid_table_entry *entry) |
323 | { |
324 | entry->state = GID_TABLE_ENTRY_VALID; |
325 | |
326 | dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n" , |
327 | __func__, entry->attr.port_num, entry->attr.index, |
328 | entry->attr.gid.raw); |
329 | |
330 | lockdep_assert_held(&table->lock); |
331 | write_lock_irq(&table->rwlock); |
332 | table->data_vec[entry->attr.index] = entry; |
333 | write_unlock_irq(&table->rwlock); |
334 | } |
335 | |
336 | static void get_gid_entry(struct ib_gid_table_entry *entry) |
337 | { |
338 | kref_get(kref: &entry->kref); |
339 | } |
340 | |
341 | static void put_gid_entry(struct ib_gid_table_entry *entry) |
342 | { |
343 | kref_put(kref: &entry->kref, release: schedule_free_gid); |
344 | } |
345 | |
346 | static void put_gid_entry_locked(struct ib_gid_table_entry *entry) |
347 | { |
348 | kref_put(kref: &entry->kref, release: free_gid_entry); |
349 | } |
350 | |
351 | static int add_roce_gid(struct ib_gid_table_entry *entry) |
352 | { |
353 | const struct ib_gid_attr *attr = &entry->attr; |
354 | int ret; |
355 | |
356 | if (!attr->ndev) { |
357 | dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n" , |
358 | __func__, attr->port_num, attr->index); |
359 | return -EINVAL; |
360 | } |
361 | if (rdma_cap_roce_gid_table(device: attr->device, port_num: attr->port_num)) { |
362 | ret = attr->device->ops.add_gid(attr, &entry->context); |
363 | if (ret) { |
364 | dev_err(&attr->device->dev, |
365 | "%s GID add failed port=%u index=%u\n" , |
366 | __func__, attr->port_num, attr->index); |
367 | return ret; |
368 | } |
369 | } |
370 | return 0; |
371 | } |
372 | |
373 | /** |
374 | * del_gid - Delete GID table entry |
375 | * |
376 | * @ib_dev: IB device whose GID entry to be deleted |
377 | * @port: Port number of the IB device |
378 | * @table: GID table of the IB device for a port |
379 | * @ix: GID entry index to delete |
380 | * |
381 | */ |
382 | static void del_gid(struct ib_device *ib_dev, u32 port, |
383 | struct ib_gid_table *table, int ix) |
384 | { |
385 | struct roce_gid_ndev_storage *ndev_storage; |
386 | struct ib_gid_table_entry *entry; |
387 | |
388 | lockdep_assert_held(&table->lock); |
389 | |
390 | dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n" , __func__, port, |
391 | ix, table->data_vec[ix]->attr.gid.raw); |
392 | |
393 | write_lock_irq(&table->rwlock); |
394 | entry = table->data_vec[ix]; |
395 | entry->state = GID_TABLE_ENTRY_PENDING_DEL; |
396 | /* |
397 | * For non RoCE protocol, GID entry slot is ready to use. |
398 | */ |
399 | if (!rdma_protocol_roce(device: ib_dev, port_num: port)) |
400 | table->data_vec[ix] = NULL; |
401 | write_unlock_irq(&table->rwlock); |
402 | |
403 | if (rdma_cap_roce_gid_table(device: ib_dev, port_num: port)) |
404 | ib_dev->ops.del_gid(&entry->attr, &entry->context); |
405 | |
406 | ndev_storage = entry->ndev_storage; |
407 | if (ndev_storage) { |
408 | entry->ndev_storage = NULL; |
409 | rcu_assign_pointer(entry->attr.ndev, NULL); |
410 | call_rcu(head: &ndev_storage->rcu_head, func: put_gid_ndev); |
411 | } |
412 | |
413 | put_gid_entry_locked(entry); |
414 | } |
415 | |
416 | /** |
417 | * add_modify_gid - Add or modify GID table entry |
418 | * |
419 | * @table: GID table in which GID to be added or modified |
420 | * @attr: Attributes of the GID |
421 | * |
422 | * Returns 0 on success or appropriate error code. It accepts zero |
423 | * GID addition for non RoCE ports for HCA's who report them as valid |
424 | * GID. However such zero GIDs are not added to the cache. |
425 | */ |
426 | static int add_modify_gid(struct ib_gid_table *table, |
427 | const struct ib_gid_attr *attr) |
428 | { |
429 | struct ib_gid_table_entry *entry; |
430 | int ret = 0; |
431 | |
432 | /* |
433 | * Invalidate any old entry in the table to make it safe to write to |
434 | * this index. |
435 | */ |
436 | if (is_gid_entry_valid(entry: table->data_vec[attr->index])) |
437 | del_gid(ib_dev: attr->device, port: attr->port_num, table, ix: attr->index); |
438 | |
439 | /* |
440 | * Some HCA's report multiple GID entries with only one valid GID, and |
441 | * leave other unused entries as the zero GID. Convert zero GIDs to |
442 | * empty table entries instead of storing them. |
443 | */ |
444 | if (rdma_is_zero_gid(&attr->gid)) |
445 | return 0; |
446 | |
447 | entry = alloc_gid_entry(attr); |
448 | if (!entry) |
449 | return -ENOMEM; |
450 | |
451 | if (rdma_protocol_roce(device: attr->device, port_num: attr->port_num)) { |
452 | ret = add_roce_gid(entry); |
453 | if (ret) |
454 | goto done; |
455 | } |
456 | |
457 | store_gid_entry(table, entry); |
458 | return 0; |
459 | |
460 | done: |
461 | put_gid_entry(entry); |
462 | return ret; |
463 | } |
464 | |
465 | /* rwlock should be read locked, or lock should be held */ |
466 | static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, |
467 | const struct ib_gid_attr *val, bool default_gid, |
468 | unsigned long mask, int *pempty) |
469 | { |
470 | int i = 0; |
471 | int found = -1; |
472 | int empty = pempty ? -1 : 0; |
473 | |
474 | while (i < table->sz && (found < 0 || empty < 0)) { |
475 | struct ib_gid_table_entry *data = table->data_vec[i]; |
476 | struct ib_gid_attr *attr; |
477 | int curr_index = i; |
478 | |
479 | i++; |
480 | |
481 | /* find_gid() is used during GID addition where it is expected |
482 | * to return a free entry slot which is not duplicate. |
483 | * Free entry slot is requested and returned if pempty is set, |
484 | * so lookup free slot only if requested. |
485 | */ |
486 | if (pempty && empty < 0) { |
487 | if (is_gid_entry_free(entry: data) && |
488 | default_gid == |
489 | is_gid_index_default(table, index: curr_index)) { |
490 | /* |
491 | * Found an invalid (free) entry; allocate it. |
492 | * If default GID is requested, then our |
493 | * found slot must be one of the DEFAULT |
494 | * reserved slots or we fail. |
495 | * This ensures that only DEFAULT reserved |
496 | * slots are used for default property GIDs. |
497 | */ |
498 | empty = curr_index; |
499 | } |
500 | } |
501 | |
502 | /* |
503 | * Additionally find_gid() is used to find valid entry during |
504 | * lookup operation; so ignore the entries which are marked as |
505 | * pending for removal and the entries which are marked as |
506 | * invalid. |
507 | */ |
508 | if (!is_gid_entry_valid(entry: data)) |
509 | continue; |
510 | |
511 | if (found >= 0) |
512 | continue; |
513 | |
514 | attr = &data->attr; |
515 | if (mask & GID_ATTR_FIND_MASK_GID_TYPE && |
516 | attr->gid_type != val->gid_type) |
517 | continue; |
518 | |
519 | if (mask & GID_ATTR_FIND_MASK_GID && |
520 | memcmp(p: gid, q: &data->attr.gid, size: sizeof(*gid))) |
521 | continue; |
522 | |
523 | if (mask & GID_ATTR_FIND_MASK_NETDEV && |
524 | attr->ndev != val->ndev) |
525 | continue; |
526 | |
527 | if (mask & GID_ATTR_FIND_MASK_DEFAULT && |
528 | is_gid_index_default(table, index: curr_index) != default_gid) |
529 | continue; |
530 | |
531 | found = curr_index; |
532 | } |
533 | |
534 | if (pempty) |
535 | *pempty = empty; |
536 | |
537 | return found; |
538 | } |
539 | |
540 | static void make_default_gid(struct net_device *dev, union ib_gid *gid) |
541 | { |
542 | gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); |
543 | addrconf_ifid_eui48(eui: &gid->raw[8], dev); |
544 | } |
545 | |
546 | static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, |
547 | union ib_gid *gid, struct ib_gid_attr *attr, |
548 | unsigned long mask, bool default_gid) |
549 | { |
550 | struct ib_gid_table *table; |
551 | int ret = 0; |
552 | int empty; |
553 | int ix; |
554 | |
555 | /* Do not allow adding zero GID in support of |
556 | * IB spec version 1.3 section 4.1.1 point (6) and |
557 | * section 12.7.10 and section 12.7.20 |
558 | */ |
559 | if (rdma_is_zero_gid(gid)) |
560 | return -EINVAL; |
561 | |
562 | table = rdma_gid_table(device: ib_dev, port); |
563 | |
564 | mutex_lock(&table->lock); |
565 | |
566 | ix = find_gid(table, gid, val: attr, default_gid, mask, pempty: &empty); |
567 | if (ix >= 0) |
568 | goto out_unlock; |
569 | |
570 | if (empty < 0) { |
571 | ret = -ENOSPC; |
572 | goto out_unlock; |
573 | } |
574 | attr->device = ib_dev; |
575 | attr->index = empty; |
576 | attr->port_num = port; |
577 | attr->gid = *gid; |
578 | ret = add_modify_gid(table, attr); |
579 | if (!ret) |
580 | dispatch_gid_change_event(ib_dev, port); |
581 | |
582 | out_unlock: |
583 | mutex_unlock(lock: &table->lock); |
584 | if (ret) |
585 | pr_warn("%s: unable to add gid %pI6 error=%d\n" , |
586 | __func__, gid->raw, ret); |
587 | return ret; |
588 | } |
589 | |
590 | int ib_cache_gid_add(struct ib_device *ib_dev, u32 port, |
591 | union ib_gid *gid, struct ib_gid_attr *attr) |
592 | { |
593 | unsigned long mask = GID_ATTR_FIND_MASK_GID | |
594 | GID_ATTR_FIND_MASK_GID_TYPE | |
595 | GID_ATTR_FIND_MASK_NETDEV; |
596 | |
597 | return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, default_gid: false); |
598 | } |
599 | |
600 | static int |
601 | _ib_cache_gid_del(struct ib_device *ib_dev, u32 port, |
602 | union ib_gid *gid, struct ib_gid_attr *attr, |
603 | unsigned long mask, bool default_gid) |
604 | { |
605 | struct ib_gid_table *table; |
606 | int ret = 0; |
607 | int ix; |
608 | |
609 | table = rdma_gid_table(device: ib_dev, port); |
610 | |
611 | mutex_lock(&table->lock); |
612 | |
613 | ix = find_gid(table, gid, val: attr, default_gid, mask, NULL); |
614 | if (ix < 0) { |
615 | ret = -EINVAL; |
616 | goto out_unlock; |
617 | } |
618 | |
619 | del_gid(ib_dev, port, table, ix); |
620 | dispatch_gid_change_event(ib_dev, port); |
621 | |
622 | out_unlock: |
623 | mutex_unlock(lock: &table->lock); |
624 | if (ret) |
625 | pr_debug("%s: can't delete gid %pI6 error=%d\n" , |
626 | __func__, gid->raw, ret); |
627 | return ret; |
628 | } |
629 | |
630 | int ib_cache_gid_del(struct ib_device *ib_dev, u32 port, |
631 | union ib_gid *gid, struct ib_gid_attr *attr) |
632 | { |
633 | unsigned long mask = GID_ATTR_FIND_MASK_GID | |
634 | GID_ATTR_FIND_MASK_GID_TYPE | |
635 | GID_ATTR_FIND_MASK_DEFAULT | |
636 | GID_ATTR_FIND_MASK_NETDEV; |
637 | |
638 | return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, default_gid: false); |
639 | } |
640 | |
641 | int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, |
642 | struct net_device *ndev) |
643 | { |
644 | struct ib_gid_table *table; |
645 | int ix; |
646 | bool deleted = false; |
647 | |
648 | table = rdma_gid_table(device: ib_dev, port); |
649 | |
650 | mutex_lock(&table->lock); |
651 | |
652 | for (ix = 0; ix < table->sz; ix++) { |
653 | if (is_gid_entry_valid(entry: table->data_vec[ix]) && |
654 | table->data_vec[ix]->attr.ndev == ndev) { |
655 | del_gid(ib_dev, port, table, ix); |
656 | deleted = true; |
657 | } |
658 | } |
659 | |
660 | mutex_unlock(lock: &table->lock); |
661 | |
662 | if (deleted) |
663 | dispatch_gid_change_event(ib_dev, port); |
664 | |
665 | return 0; |
666 | } |
667 | |
668 | /** |
669 | * rdma_find_gid_by_port - Returns the GID entry attributes when it finds |
670 | * a valid GID entry for given search parameters. It searches for the specified |
671 | * GID value in the local software cache. |
672 | * @ib_dev: The device to query. |
673 | * @gid: The GID value to search for. |
674 | * @gid_type: The GID type to search for. |
675 | * @port: The port number of the device where the GID value should be searched. |
676 | * @ndev: In RoCE, the net device of the device. NULL means ignore. |
677 | * |
678 | * Returns sgid attributes if the GID is found with valid reference or |
679 | * returns ERR_PTR for the error. |
680 | * The caller must invoke rdma_put_gid_attr() to release the reference. |
681 | */ |
682 | const struct ib_gid_attr * |
683 | rdma_find_gid_by_port(struct ib_device *ib_dev, |
684 | const union ib_gid *gid, |
685 | enum ib_gid_type gid_type, |
686 | u32 port, struct net_device *ndev) |
687 | { |
688 | int local_index; |
689 | struct ib_gid_table *table; |
690 | unsigned long mask = GID_ATTR_FIND_MASK_GID | |
691 | GID_ATTR_FIND_MASK_GID_TYPE; |
692 | struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; |
693 | const struct ib_gid_attr *attr; |
694 | unsigned long flags; |
695 | |
696 | if (!rdma_is_port_valid(device: ib_dev, port)) |
697 | return ERR_PTR(error: -ENOENT); |
698 | |
699 | table = rdma_gid_table(device: ib_dev, port); |
700 | |
701 | if (ndev) |
702 | mask |= GID_ATTR_FIND_MASK_NETDEV; |
703 | |
704 | read_lock_irqsave(&table->rwlock, flags); |
705 | local_index = find_gid(table, gid, val: &val, default_gid: false, mask, NULL); |
706 | if (local_index >= 0) { |
707 | get_gid_entry(entry: table->data_vec[local_index]); |
708 | attr = &table->data_vec[local_index]->attr; |
709 | read_unlock_irqrestore(&table->rwlock, flags); |
710 | return attr; |
711 | } |
712 | |
713 | read_unlock_irqrestore(&table->rwlock, flags); |
714 | return ERR_PTR(error: -ENOENT); |
715 | } |
716 | EXPORT_SYMBOL(rdma_find_gid_by_port); |
717 | |
718 | /** |
719 | * rdma_find_gid_by_filter - Returns the GID table attribute where a |
720 | * specified GID value occurs |
721 | * @ib_dev: The device to query. |
722 | * @gid: The GID value to search for. |
723 | * @port: The port number of the device where the GID value could be |
724 | * searched. |
725 | * @filter: The filter function is executed on any matching GID in the table. |
726 | * If the filter function returns true, the corresponding index is returned, |
727 | * otherwise, we continue searching the GID table. It's guaranteed that |
728 | * while filter is executed, ndev field is valid and the structure won't |
729 | * change. filter is executed in an atomic context. filter must not be NULL. |
730 | * @context: Private data to pass into the call-back. |
731 | * |
732 | * rdma_find_gid_by_filter() searches for the specified GID value |
733 | * of which the filter function returns true in the port's GID table. |
734 | * |
735 | */ |
736 | const struct ib_gid_attr *rdma_find_gid_by_filter( |
737 | struct ib_device *ib_dev, const union ib_gid *gid, u32 port, |
738 | bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, |
739 | void *), |
740 | void *context) |
741 | { |
742 | const struct ib_gid_attr *res = ERR_PTR(error: -ENOENT); |
743 | struct ib_gid_table *table; |
744 | unsigned long flags; |
745 | unsigned int i; |
746 | |
747 | if (!rdma_is_port_valid(device: ib_dev, port)) |
748 | return ERR_PTR(error: -EINVAL); |
749 | |
750 | table = rdma_gid_table(device: ib_dev, port); |
751 | |
752 | read_lock_irqsave(&table->rwlock, flags); |
753 | for (i = 0; i < table->sz; i++) { |
754 | struct ib_gid_table_entry *entry = table->data_vec[i]; |
755 | |
756 | if (!is_gid_entry_valid(entry)) |
757 | continue; |
758 | |
759 | if (memcmp(p: gid, q: &entry->attr.gid, size: sizeof(*gid))) |
760 | continue; |
761 | |
762 | if (filter(gid, &entry->attr, context)) { |
763 | get_gid_entry(entry); |
764 | res = &entry->attr; |
765 | break; |
766 | } |
767 | } |
768 | read_unlock_irqrestore(&table->rwlock, flags); |
769 | return res; |
770 | } |
771 | |
772 | static struct ib_gid_table *alloc_gid_table(int sz) |
773 | { |
774 | struct ib_gid_table *table = kzalloc(size: sizeof(*table), GFP_KERNEL); |
775 | |
776 | if (!table) |
777 | return NULL; |
778 | |
779 | table->data_vec = kcalloc(n: sz, size: sizeof(*table->data_vec), GFP_KERNEL); |
780 | if (!table->data_vec) |
781 | goto err_free_table; |
782 | |
783 | mutex_init(&table->lock); |
784 | |
785 | table->sz = sz; |
786 | rwlock_init(&table->rwlock); |
787 | return table; |
788 | |
789 | err_free_table: |
790 | kfree(objp: table); |
791 | return NULL; |
792 | } |
793 | |
794 | static void release_gid_table(struct ib_device *device, |
795 | struct ib_gid_table *table) |
796 | { |
797 | bool leak = false; |
798 | int i; |
799 | |
800 | if (!table) |
801 | return; |
802 | |
803 | for (i = 0; i < table->sz; i++) { |
804 | if (is_gid_entry_free(entry: table->data_vec[i])) |
805 | continue; |
806 | if (kref_read(kref: &table->data_vec[i]->kref) > 1) { |
807 | dev_err(&device->dev, |
808 | "GID entry ref leak for index %d ref=%u\n" , i, |
809 | kref_read(&table->data_vec[i]->kref)); |
810 | leak = true; |
811 | } |
812 | } |
813 | if (leak) |
814 | return; |
815 | |
816 | mutex_destroy(lock: &table->lock); |
817 | kfree(objp: table->data_vec); |
818 | kfree(objp: table); |
819 | } |
820 | |
821 | static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port, |
822 | struct ib_gid_table *table) |
823 | { |
824 | int i; |
825 | |
826 | if (!table) |
827 | return; |
828 | |
829 | mutex_lock(&table->lock); |
830 | for (i = 0; i < table->sz; ++i) { |
831 | if (is_gid_entry_valid(entry: table->data_vec[i])) |
832 | del_gid(ib_dev, port, table, ix: i); |
833 | } |
834 | mutex_unlock(lock: &table->lock); |
835 | } |
836 | |
837 | void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port, |
838 | struct net_device *ndev, |
839 | unsigned long gid_type_mask, |
840 | enum ib_cache_gid_default_mode mode) |
841 | { |
842 | union ib_gid gid = { }; |
843 | struct ib_gid_attr gid_attr; |
844 | unsigned int gid_type; |
845 | unsigned long mask; |
846 | |
847 | mask = GID_ATTR_FIND_MASK_GID_TYPE | |
848 | GID_ATTR_FIND_MASK_DEFAULT | |
849 | GID_ATTR_FIND_MASK_NETDEV; |
850 | memset(&gid_attr, 0, sizeof(gid_attr)); |
851 | gid_attr.ndev = ndev; |
852 | |
853 | for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { |
854 | if (1UL << gid_type & ~gid_type_mask) |
855 | continue; |
856 | |
857 | gid_attr.gid_type = gid_type; |
858 | |
859 | if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { |
860 | make_default_gid(dev: ndev, gid: &gid); |
861 | __ib_cache_gid_add(ib_dev, port, gid: &gid, |
862 | attr: &gid_attr, mask, default_gid: true); |
863 | } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { |
864 | _ib_cache_gid_del(ib_dev, port, gid: &gid, |
865 | attr: &gid_attr, mask, default_gid: true); |
866 | } |
867 | } |
868 | } |
869 | |
870 | static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port, |
871 | struct ib_gid_table *table) |
872 | { |
873 | unsigned int i; |
874 | unsigned long roce_gid_type_mask; |
875 | unsigned int num_default_gids; |
876 | |
877 | roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); |
878 | num_default_gids = hweight_long(w: roce_gid_type_mask); |
879 | /* Reserve starting indices for default GIDs */ |
880 | for (i = 0; i < num_default_gids && i < table->sz; i++) |
881 | table->default_gid_indices |= BIT(i); |
882 | } |
883 | |
884 | |
885 | static void gid_table_release_one(struct ib_device *ib_dev) |
886 | { |
887 | u32 p; |
888 | |
889 | rdma_for_each_port (ib_dev, p) { |
890 | release_gid_table(device: ib_dev, table: ib_dev->port_data[p].cache.gid); |
891 | ib_dev->port_data[p].cache.gid = NULL; |
892 | } |
893 | } |
894 | |
895 | static int _gid_table_setup_one(struct ib_device *ib_dev) |
896 | { |
897 | struct ib_gid_table *table; |
898 | u32 rdma_port; |
899 | |
900 | rdma_for_each_port (ib_dev, rdma_port) { |
901 | table = alloc_gid_table( |
902 | sz: ib_dev->port_data[rdma_port].immutable.gid_tbl_len); |
903 | if (!table) |
904 | goto rollback_table_setup; |
905 | |
906 | gid_table_reserve_default(ib_dev, port: rdma_port, table); |
907 | ib_dev->port_data[rdma_port].cache.gid = table; |
908 | } |
909 | return 0; |
910 | |
911 | rollback_table_setup: |
912 | gid_table_release_one(ib_dev); |
913 | return -ENOMEM; |
914 | } |
915 | |
916 | static void gid_table_cleanup_one(struct ib_device *ib_dev) |
917 | { |
918 | u32 p; |
919 | |
920 | rdma_for_each_port (ib_dev, p) |
921 | cleanup_gid_table_port(ib_dev, port: p, |
922 | table: ib_dev->port_data[p].cache.gid); |
923 | } |
924 | |
925 | static int gid_table_setup_one(struct ib_device *ib_dev) |
926 | { |
927 | int err; |
928 | |
929 | err = _gid_table_setup_one(ib_dev); |
930 | |
931 | if (err) |
932 | return err; |
933 | |
934 | rdma_roce_rescan_device(ibdev: ib_dev); |
935 | |
936 | return err; |
937 | } |
938 | |
939 | /** |
940 | * rdma_query_gid - Read the GID content from the GID software cache |
941 | * @device: Device to query the GID |
942 | * @port_num: Port number of the device |
943 | * @index: Index of the GID table entry to read |
944 | * @gid: Pointer to GID where to store the entry's GID |
945 | * |
946 | * rdma_query_gid() only reads the GID entry content for requested device, |
947 | * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't |
948 | * hold any reference to the GID table entry in the HCA or software cache. |
949 | * |
950 | * Returns 0 on success or appropriate error code. |
951 | * |
952 | */ |
953 | int rdma_query_gid(struct ib_device *device, u32 port_num, |
954 | int index, union ib_gid *gid) |
955 | { |
956 | struct ib_gid_table *table; |
957 | unsigned long flags; |
958 | int res; |
959 | |
960 | if (!rdma_is_port_valid(device, port: port_num)) |
961 | return -EINVAL; |
962 | |
963 | table = rdma_gid_table(device, port: port_num); |
964 | read_lock_irqsave(&table->rwlock, flags); |
965 | |
966 | if (index < 0 || index >= table->sz) { |
967 | res = -EINVAL; |
968 | goto done; |
969 | } |
970 | |
971 | if (!is_gid_entry_valid(entry: table->data_vec[index])) { |
972 | res = -ENOENT; |
973 | goto done; |
974 | } |
975 | |
976 | memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); |
977 | res = 0; |
978 | |
979 | done: |
980 | read_unlock_irqrestore(&table->rwlock, flags); |
981 | return res; |
982 | } |
983 | EXPORT_SYMBOL(rdma_query_gid); |
984 | |
985 | /** |
986 | * rdma_read_gid_hw_context - Read the HW GID context from GID attribute |
987 | * @attr: Potinter to the GID attribute |
988 | * |
989 | * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding |
990 | * to the SGID attr. Callers are required to already be holding the reference |
991 | * to an existing GID entry. |
992 | * |
993 | * Returns the HW GID context |
994 | * |
995 | */ |
996 | void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr) |
997 | { |
998 | return container_of(attr, struct ib_gid_table_entry, attr)->context; |
999 | } |
1000 | EXPORT_SYMBOL(rdma_read_gid_hw_context); |
1001 | |
1002 | /** |
1003 | * rdma_find_gid - Returns SGID attributes if the matching GID is found. |
1004 | * @device: The device to query. |
1005 | * @gid: The GID value to search for. |
1006 | * @gid_type: The GID type to search for. |
1007 | * @ndev: In RoCE, the net device of the device. NULL means ignore. |
1008 | * |
1009 | * rdma_find_gid() searches for the specified GID value in the software cache. |
1010 | * |
1011 | * Returns GID attributes if a valid GID is found or returns ERR_PTR for the |
1012 | * error. The caller must invoke rdma_put_gid_attr() to release the reference. |
1013 | * |
1014 | */ |
1015 | const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, |
1016 | const union ib_gid *gid, |
1017 | enum ib_gid_type gid_type, |
1018 | struct net_device *ndev) |
1019 | { |
1020 | unsigned long mask = GID_ATTR_FIND_MASK_GID | |
1021 | GID_ATTR_FIND_MASK_GID_TYPE; |
1022 | struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; |
1023 | u32 p; |
1024 | |
1025 | if (ndev) |
1026 | mask |= GID_ATTR_FIND_MASK_NETDEV; |
1027 | |
1028 | rdma_for_each_port(device, p) { |
1029 | struct ib_gid_table *table; |
1030 | unsigned long flags; |
1031 | int index; |
1032 | |
1033 | table = device->port_data[p].cache.gid; |
1034 | read_lock_irqsave(&table->rwlock, flags); |
1035 | index = find_gid(table, gid, val: &gid_attr_val, default_gid: false, mask, NULL); |
1036 | if (index >= 0) { |
1037 | const struct ib_gid_attr *attr; |
1038 | |
1039 | get_gid_entry(entry: table->data_vec[index]); |
1040 | attr = &table->data_vec[index]->attr; |
1041 | read_unlock_irqrestore(&table->rwlock, flags); |
1042 | return attr; |
1043 | } |
1044 | read_unlock_irqrestore(&table->rwlock, flags); |
1045 | } |
1046 | |
1047 | return ERR_PTR(error: -ENOENT); |
1048 | } |
1049 | EXPORT_SYMBOL(rdma_find_gid); |
1050 | |
1051 | int ib_get_cached_pkey(struct ib_device *device, |
1052 | u32 port_num, |
1053 | int index, |
1054 | u16 *pkey) |
1055 | { |
1056 | struct ib_pkey_cache *cache; |
1057 | unsigned long flags; |
1058 | int ret = 0; |
1059 | |
1060 | if (!rdma_is_port_valid(device, port: port_num)) |
1061 | return -EINVAL; |
1062 | |
1063 | read_lock_irqsave(&device->cache_lock, flags); |
1064 | |
1065 | cache = device->port_data[port_num].cache.pkey; |
1066 | |
1067 | if (!cache || index < 0 || index >= cache->table_len) |
1068 | ret = -EINVAL; |
1069 | else |
1070 | *pkey = cache->table[index]; |
1071 | |
1072 | read_unlock_irqrestore(&device->cache_lock, flags); |
1073 | |
1074 | return ret; |
1075 | } |
1076 | EXPORT_SYMBOL(ib_get_cached_pkey); |
1077 | |
1078 | void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num, |
1079 | u64 *sn_pfx) |
1080 | { |
1081 | unsigned long flags; |
1082 | |
1083 | read_lock_irqsave(&device->cache_lock, flags); |
1084 | *sn_pfx = device->port_data[port_num].cache.subnet_prefix; |
1085 | read_unlock_irqrestore(&device->cache_lock, flags); |
1086 | } |
1087 | EXPORT_SYMBOL(ib_get_cached_subnet_prefix); |
1088 | |
1089 | int ib_find_cached_pkey(struct ib_device *device, u32 port_num, |
1090 | u16 pkey, u16 *index) |
1091 | { |
1092 | struct ib_pkey_cache *cache; |
1093 | unsigned long flags; |
1094 | int i; |
1095 | int ret = -ENOENT; |
1096 | int partial_ix = -1; |
1097 | |
1098 | if (!rdma_is_port_valid(device, port: port_num)) |
1099 | return -EINVAL; |
1100 | |
1101 | read_lock_irqsave(&device->cache_lock, flags); |
1102 | |
1103 | cache = device->port_data[port_num].cache.pkey; |
1104 | if (!cache) { |
1105 | ret = -EINVAL; |
1106 | goto err; |
1107 | } |
1108 | |
1109 | *index = -1; |
1110 | |
1111 | for (i = 0; i < cache->table_len; ++i) |
1112 | if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { |
1113 | if (cache->table[i] & 0x8000) { |
1114 | *index = i; |
1115 | ret = 0; |
1116 | break; |
1117 | } else { |
1118 | partial_ix = i; |
1119 | } |
1120 | } |
1121 | |
1122 | if (ret && partial_ix >= 0) { |
1123 | *index = partial_ix; |
1124 | ret = 0; |
1125 | } |
1126 | |
1127 | err: |
1128 | read_unlock_irqrestore(&device->cache_lock, flags); |
1129 | |
1130 | return ret; |
1131 | } |
1132 | EXPORT_SYMBOL(ib_find_cached_pkey); |
1133 | |
1134 | int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num, |
1135 | u16 pkey, u16 *index) |
1136 | { |
1137 | struct ib_pkey_cache *cache; |
1138 | unsigned long flags; |
1139 | int i; |
1140 | int ret = -ENOENT; |
1141 | |
1142 | if (!rdma_is_port_valid(device, port: port_num)) |
1143 | return -EINVAL; |
1144 | |
1145 | read_lock_irqsave(&device->cache_lock, flags); |
1146 | |
1147 | cache = device->port_data[port_num].cache.pkey; |
1148 | if (!cache) { |
1149 | ret = -EINVAL; |
1150 | goto err; |
1151 | } |
1152 | |
1153 | *index = -1; |
1154 | |
1155 | for (i = 0; i < cache->table_len; ++i) |
1156 | if (cache->table[i] == pkey) { |
1157 | *index = i; |
1158 | ret = 0; |
1159 | break; |
1160 | } |
1161 | |
1162 | err: |
1163 | read_unlock_irqrestore(&device->cache_lock, flags); |
1164 | |
1165 | return ret; |
1166 | } |
1167 | EXPORT_SYMBOL(ib_find_exact_cached_pkey); |
1168 | |
1169 | int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc) |
1170 | { |
1171 | unsigned long flags; |
1172 | int ret = 0; |
1173 | |
1174 | if (!rdma_is_port_valid(device, port: port_num)) |
1175 | return -EINVAL; |
1176 | |
1177 | read_lock_irqsave(&device->cache_lock, flags); |
1178 | *lmc = device->port_data[port_num].cache.lmc; |
1179 | read_unlock_irqrestore(&device->cache_lock, flags); |
1180 | |
1181 | return ret; |
1182 | } |
1183 | EXPORT_SYMBOL(ib_get_cached_lmc); |
1184 | |
1185 | int ib_get_cached_port_state(struct ib_device *device, u32 port_num, |
1186 | enum ib_port_state *port_state) |
1187 | { |
1188 | unsigned long flags; |
1189 | int ret = 0; |
1190 | |
1191 | if (!rdma_is_port_valid(device, port: port_num)) |
1192 | return -EINVAL; |
1193 | |
1194 | read_lock_irqsave(&device->cache_lock, flags); |
1195 | *port_state = device->port_data[port_num].cache.port_state; |
1196 | read_unlock_irqrestore(&device->cache_lock, flags); |
1197 | |
1198 | return ret; |
1199 | } |
1200 | EXPORT_SYMBOL(ib_get_cached_port_state); |
1201 | |
1202 | /** |
1203 | * rdma_get_gid_attr - Returns GID attributes for a port of a device |
1204 | * at a requested gid_index, if a valid GID entry exists. |
1205 | * @device: The device to query. |
1206 | * @port_num: The port number on the device where the GID value |
1207 | * is to be queried. |
1208 | * @index: Index of the GID table entry whose attributes are to |
1209 | * be queried. |
1210 | * |
1211 | * rdma_get_gid_attr() acquires reference count of gid attributes from the |
1212 | * cached GID table. Caller must invoke rdma_put_gid_attr() to release |
1213 | * reference to gid attribute regardless of link layer. |
1214 | * |
1215 | * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error |
1216 | * code. |
1217 | */ |
1218 | const struct ib_gid_attr * |
1219 | rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index) |
1220 | { |
1221 | const struct ib_gid_attr *attr = ERR_PTR(error: -ENODATA); |
1222 | struct ib_gid_table *table; |
1223 | unsigned long flags; |
1224 | |
1225 | if (!rdma_is_port_valid(device, port: port_num)) |
1226 | return ERR_PTR(error: -EINVAL); |
1227 | |
1228 | table = rdma_gid_table(device, port: port_num); |
1229 | if (index < 0 || index >= table->sz) |
1230 | return ERR_PTR(error: -EINVAL); |
1231 | |
1232 | read_lock_irqsave(&table->rwlock, flags); |
1233 | if (!is_gid_entry_valid(entry: table->data_vec[index])) |
1234 | goto done; |
1235 | |
1236 | get_gid_entry(entry: table->data_vec[index]); |
1237 | attr = &table->data_vec[index]->attr; |
1238 | done: |
1239 | read_unlock_irqrestore(&table->rwlock, flags); |
1240 | return attr; |
1241 | } |
1242 | EXPORT_SYMBOL(rdma_get_gid_attr); |
1243 | |
1244 | /** |
1245 | * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries. |
1246 | * @device: The device to query. |
1247 | * @entries: Entries where GID entries are returned. |
1248 | * @max_entries: Maximum number of entries that can be returned. |
1249 | * Entries array must be allocated to hold max_entries number of entries. |
1250 | * |
1251 | * Returns number of entries on success or appropriate error code. |
1252 | */ |
1253 | ssize_t rdma_query_gid_table(struct ib_device *device, |
1254 | struct ib_uverbs_gid_entry *entries, |
1255 | size_t max_entries) |
1256 | { |
1257 | const struct ib_gid_attr *gid_attr; |
1258 | ssize_t num_entries = 0, ret; |
1259 | struct ib_gid_table *table; |
1260 | u32 port_num, i; |
1261 | struct net_device *ndev; |
1262 | unsigned long flags; |
1263 | |
1264 | rdma_for_each_port(device, port_num) { |
1265 | table = rdma_gid_table(device, port: port_num); |
1266 | read_lock_irqsave(&table->rwlock, flags); |
1267 | for (i = 0; i < table->sz; i++) { |
1268 | if (!is_gid_entry_valid(entry: table->data_vec[i])) |
1269 | continue; |
1270 | if (num_entries >= max_entries) { |
1271 | ret = -EINVAL; |
1272 | goto err; |
1273 | } |
1274 | |
1275 | gid_attr = &table->data_vec[i]->attr; |
1276 | |
1277 | memcpy(&entries->gid, &gid_attr->gid, |
1278 | sizeof(gid_attr->gid)); |
1279 | entries->gid_index = gid_attr->index; |
1280 | entries->port_num = gid_attr->port_num; |
1281 | entries->gid_type = gid_attr->gid_type; |
1282 | ndev = rcu_dereference_protected( |
1283 | gid_attr->ndev, |
1284 | lockdep_is_held(&table->rwlock)); |
1285 | if (ndev) |
1286 | entries->netdev_ifindex = ndev->ifindex; |
1287 | |
1288 | num_entries++; |
1289 | entries++; |
1290 | } |
1291 | read_unlock_irqrestore(&table->rwlock, flags); |
1292 | } |
1293 | |
1294 | return num_entries; |
1295 | err: |
1296 | read_unlock_irqrestore(&table->rwlock, flags); |
1297 | return ret; |
1298 | } |
1299 | EXPORT_SYMBOL(rdma_query_gid_table); |
1300 | |
1301 | /** |
1302 | * rdma_put_gid_attr - Release reference to the GID attribute |
1303 | * @attr: Pointer to the GID attribute whose reference |
1304 | * needs to be released. |
1305 | * |
1306 | * rdma_put_gid_attr() must be used to release reference whose |
1307 | * reference is acquired using rdma_get_gid_attr() or any APIs |
1308 | * which returns a pointer to the ib_gid_attr regardless of link layer |
1309 | * of IB or RoCE. |
1310 | * |
1311 | */ |
1312 | void rdma_put_gid_attr(const struct ib_gid_attr *attr) |
1313 | { |
1314 | struct ib_gid_table_entry *entry = |
1315 | container_of(attr, struct ib_gid_table_entry, attr); |
1316 | |
1317 | put_gid_entry(entry); |
1318 | } |
1319 | EXPORT_SYMBOL(rdma_put_gid_attr); |
1320 | |
1321 | /** |
1322 | * rdma_hold_gid_attr - Get reference to existing GID attribute |
1323 | * |
1324 | * @attr: Pointer to the GID attribute whose reference |
1325 | * needs to be taken. |
1326 | * |
1327 | * Increase the reference count to a GID attribute to keep it from being |
1328 | * freed. Callers are required to already be holding a reference to attribute. |
1329 | * |
1330 | */ |
1331 | void rdma_hold_gid_attr(const struct ib_gid_attr *attr) |
1332 | { |
1333 | struct ib_gid_table_entry *entry = |
1334 | container_of(attr, struct ib_gid_table_entry, attr); |
1335 | |
1336 | get_gid_entry(entry); |
1337 | } |
1338 | EXPORT_SYMBOL(rdma_hold_gid_attr); |
1339 | |
1340 | /** |
1341 | * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice |
1342 | * which must be in UP state. |
1343 | * |
1344 | * @attr:Pointer to the GID attribute |
1345 | * |
1346 | * Returns pointer to netdevice if the netdevice was attached to GID and |
1347 | * netdevice is in UP state. Caller must hold RCU lock as this API |
1348 | * reads the netdev flags which can change while netdevice migrates to |
1349 | * different net namespace. Returns ERR_PTR with error code otherwise. |
1350 | * |
1351 | */ |
1352 | struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) |
1353 | { |
1354 | struct ib_gid_table_entry *entry = |
1355 | container_of(attr, struct ib_gid_table_entry, attr); |
1356 | struct ib_device *device = entry->attr.device; |
1357 | struct net_device *ndev = ERR_PTR(error: -EINVAL); |
1358 | u32 port_num = entry->attr.port_num; |
1359 | struct ib_gid_table *table; |
1360 | unsigned long flags; |
1361 | bool valid; |
1362 | |
1363 | table = rdma_gid_table(device, port: port_num); |
1364 | |
1365 | read_lock_irqsave(&table->rwlock, flags); |
1366 | valid = is_gid_entry_valid(entry: table->data_vec[attr->index]); |
1367 | if (valid) { |
1368 | ndev = rcu_dereference(attr->ndev); |
1369 | if (!ndev) |
1370 | ndev = ERR_PTR(error: -ENODEV); |
1371 | } |
1372 | read_unlock_irqrestore(&table->rwlock, flags); |
1373 | return ndev; |
1374 | } |
1375 | EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); |
1376 | |
1377 | static int get_lower_dev_vlan(struct net_device *lower_dev, |
1378 | struct netdev_nested_priv *priv) |
1379 | { |
1380 | u16 *vlan_id = (u16 *)priv->data; |
1381 | |
1382 | if (is_vlan_dev(dev: lower_dev)) |
1383 | *vlan_id = vlan_dev_vlan_id(dev: lower_dev); |
1384 | |
1385 | /* We are interested only in first level vlan device, so |
1386 | * always return 1 to stop iterating over next level devices. |
1387 | */ |
1388 | return 1; |
1389 | } |
1390 | |
1391 | /** |
1392 | * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address |
1393 | * of a GID entry. |
1394 | * |
1395 | * @attr: GID attribute pointer whose L2 fields to be read |
1396 | * @vlan_id: Pointer to vlan id to fill up if the GID entry has |
1397 | * vlan id. It is optional. |
1398 | * @smac: Pointer to smac to fill up for a GID entry. It is optional. |
1399 | * |
1400 | * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id |
1401 | * (if gid entry has vlan) and source MAC, or returns error. |
1402 | */ |
1403 | int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, |
1404 | u16 *vlan_id, u8 *smac) |
1405 | { |
1406 | struct netdev_nested_priv priv = { |
1407 | .data = (void *)vlan_id, |
1408 | }; |
1409 | struct net_device *ndev; |
1410 | |
1411 | rcu_read_lock(); |
1412 | ndev = rcu_dereference(attr->ndev); |
1413 | if (!ndev) { |
1414 | rcu_read_unlock(); |
1415 | return -ENODEV; |
1416 | } |
1417 | if (smac) |
1418 | ether_addr_copy(dst: smac, src: ndev->dev_addr); |
1419 | if (vlan_id) { |
1420 | *vlan_id = 0xffff; |
1421 | if (is_vlan_dev(dev: ndev)) { |
1422 | *vlan_id = vlan_dev_vlan_id(dev: ndev); |
1423 | } else { |
1424 | /* If the netdev is upper device and if it's lower |
1425 | * device is vlan device, consider vlan id of |
1426 | * the lower vlan device for this gid entry. |
1427 | */ |
1428 | netdev_walk_all_lower_dev_rcu(dev: attr->ndev, |
1429 | fn: get_lower_dev_vlan, priv: &priv); |
1430 | } |
1431 | } |
1432 | rcu_read_unlock(); |
1433 | return 0; |
1434 | } |
1435 | EXPORT_SYMBOL(rdma_read_gid_l2_fields); |
1436 | |
1437 | static int config_non_roce_gid_cache(struct ib_device *device, |
1438 | u32 port, struct ib_port_attr *tprops) |
1439 | { |
1440 | struct ib_gid_attr gid_attr = {}; |
1441 | struct ib_gid_table *table; |
1442 | int ret = 0; |
1443 | int i; |
1444 | |
1445 | gid_attr.device = device; |
1446 | gid_attr.port_num = port; |
1447 | table = rdma_gid_table(device, port); |
1448 | |
1449 | mutex_lock(&table->lock); |
1450 | for (i = 0; i < tprops->gid_tbl_len; ++i) { |
1451 | if (!device->ops.query_gid) |
1452 | continue; |
1453 | ret = device->ops.query_gid(device, port, i, &gid_attr.gid); |
1454 | if (ret) { |
1455 | dev_warn(&device->dev, |
1456 | "query_gid failed (%d) for index %d\n" , ret, |
1457 | i); |
1458 | goto err; |
1459 | } |
1460 | |
1461 | if (rdma_protocol_iwarp(device, port_num: port)) { |
1462 | struct net_device *ndev; |
1463 | |
1464 | ndev = ib_device_get_netdev(ib_dev: device, port); |
1465 | if (!ndev) |
1466 | continue; |
1467 | RCU_INIT_POINTER(gid_attr.ndev, ndev); |
1468 | dev_put(dev: ndev); |
1469 | } |
1470 | |
1471 | gid_attr.index = i; |
1472 | tprops->subnet_prefix = |
1473 | be64_to_cpu(gid_attr.gid.global.subnet_prefix); |
1474 | add_modify_gid(table, attr: &gid_attr); |
1475 | } |
1476 | err: |
1477 | mutex_unlock(lock: &table->lock); |
1478 | return ret; |
1479 | } |
1480 | |
1481 | static int |
1482 | ib_cache_update(struct ib_device *device, u32 port, bool update_gids, |
1483 | bool update_pkeys, bool enforce_security) |
1484 | { |
1485 | struct ib_port_attr *tprops = NULL; |
1486 | struct ib_pkey_cache *pkey_cache = NULL; |
1487 | struct ib_pkey_cache *old_pkey_cache = NULL; |
1488 | int i; |
1489 | int ret; |
1490 | |
1491 | if (!rdma_is_port_valid(device, port)) |
1492 | return -EINVAL; |
1493 | |
1494 | tprops = kmalloc(size: sizeof *tprops, GFP_KERNEL); |
1495 | if (!tprops) |
1496 | return -ENOMEM; |
1497 | |
1498 | ret = ib_query_port(device, port_num: port, port_attr: tprops); |
1499 | if (ret) { |
1500 | dev_warn(&device->dev, "ib_query_port failed (%d)\n" , ret); |
1501 | goto err; |
1502 | } |
1503 | |
1504 | if (!rdma_protocol_roce(device, port_num: port) && update_gids) { |
1505 | ret = config_non_roce_gid_cache(device, port, |
1506 | tprops); |
1507 | if (ret) |
1508 | goto err; |
1509 | } |
1510 | |
1511 | update_pkeys &= !!tprops->pkey_tbl_len; |
1512 | |
1513 | if (update_pkeys) { |
1514 | pkey_cache = kmalloc(struct_size(pkey_cache, table, |
1515 | tprops->pkey_tbl_len), |
1516 | GFP_KERNEL); |
1517 | if (!pkey_cache) { |
1518 | ret = -ENOMEM; |
1519 | goto err; |
1520 | } |
1521 | |
1522 | pkey_cache->table_len = tprops->pkey_tbl_len; |
1523 | |
1524 | for (i = 0; i < pkey_cache->table_len; ++i) { |
1525 | ret = ib_query_pkey(device, port_num: port, index: i, |
1526 | pkey: pkey_cache->table + i); |
1527 | if (ret) { |
1528 | dev_warn(&device->dev, |
1529 | "ib_query_pkey failed (%d) for index %d\n" , |
1530 | ret, i); |
1531 | goto err; |
1532 | } |
1533 | } |
1534 | } |
1535 | |
1536 | write_lock_irq(&device->cache_lock); |
1537 | |
1538 | if (update_pkeys) { |
1539 | old_pkey_cache = device->port_data[port].cache.pkey; |
1540 | device->port_data[port].cache.pkey = pkey_cache; |
1541 | } |
1542 | device->port_data[port].cache.lmc = tprops->lmc; |
1543 | device->port_data[port].cache.port_state = tprops->state; |
1544 | |
1545 | device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; |
1546 | write_unlock_irq(&device->cache_lock); |
1547 | |
1548 | if (enforce_security) |
1549 | ib_security_cache_change(device, |
1550 | port_num: port, |
1551 | subnet_prefix: tprops->subnet_prefix); |
1552 | |
1553 | kfree(objp: old_pkey_cache); |
1554 | kfree(objp: tprops); |
1555 | return 0; |
1556 | |
1557 | err: |
1558 | kfree(objp: pkey_cache); |
1559 | kfree(objp: tprops); |
1560 | return ret; |
1561 | } |
1562 | |
1563 | static void ib_cache_event_task(struct work_struct *_work) |
1564 | { |
1565 | struct ib_update_work *work = |
1566 | container_of(_work, struct ib_update_work, work); |
1567 | int ret; |
1568 | |
1569 | /* Before distributing the cache update event, first sync |
1570 | * the cache. |
1571 | */ |
1572 | ret = ib_cache_update(device: work->event.device, port: work->event.element.port_num, |
1573 | update_gids: work->event.event == IB_EVENT_GID_CHANGE, |
1574 | update_pkeys: work->event.event == IB_EVENT_PKEY_CHANGE, |
1575 | enforce_security: work->enforce_security); |
1576 | |
1577 | /* GID event is notified already for individual GID entries by |
1578 | * dispatch_gid_change_event(). Hence, notifiy for rest of the |
1579 | * events. |
1580 | */ |
1581 | if (!ret && work->event.event != IB_EVENT_GID_CHANGE) |
1582 | ib_dispatch_event_clients(event: &work->event); |
1583 | |
1584 | kfree(objp: work); |
1585 | } |
1586 | |
1587 | static void ib_generic_event_task(struct work_struct *_work) |
1588 | { |
1589 | struct ib_update_work *work = |
1590 | container_of(_work, struct ib_update_work, work); |
1591 | |
1592 | ib_dispatch_event_clients(event: &work->event); |
1593 | kfree(objp: work); |
1594 | } |
1595 | |
1596 | static bool is_cache_update_event(const struct ib_event *event) |
1597 | { |
1598 | return (event->event == IB_EVENT_PORT_ERR || |
1599 | event->event == IB_EVENT_PORT_ACTIVE || |
1600 | event->event == IB_EVENT_LID_CHANGE || |
1601 | event->event == IB_EVENT_PKEY_CHANGE || |
1602 | event->event == IB_EVENT_CLIENT_REREGISTER || |
1603 | event->event == IB_EVENT_GID_CHANGE); |
1604 | } |
1605 | |
1606 | /** |
1607 | * ib_dispatch_event - Dispatch an asynchronous event |
1608 | * @event:Event to dispatch |
1609 | * |
1610 | * Low-level drivers must call ib_dispatch_event() to dispatch the |
1611 | * event to all registered event handlers when an asynchronous event |
1612 | * occurs. |
1613 | */ |
1614 | void ib_dispatch_event(const struct ib_event *event) |
1615 | { |
1616 | struct ib_update_work *work; |
1617 | |
1618 | work = kzalloc(size: sizeof(*work), GFP_ATOMIC); |
1619 | if (!work) |
1620 | return; |
1621 | |
1622 | if (is_cache_update_event(event)) |
1623 | INIT_WORK(&work->work, ib_cache_event_task); |
1624 | else |
1625 | INIT_WORK(&work->work, ib_generic_event_task); |
1626 | |
1627 | work->event = *event; |
1628 | if (event->event == IB_EVENT_PKEY_CHANGE || |
1629 | event->event == IB_EVENT_GID_CHANGE) |
1630 | work->enforce_security = true; |
1631 | |
1632 | queue_work(wq: ib_wq, work: &work->work); |
1633 | } |
1634 | EXPORT_SYMBOL(ib_dispatch_event); |
1635 | |
1636 | int ib_cache_setup_one(struct ib_device *device) |
1637 | { |
1638 | u32 p; |
1639 | int err; |
1640 | |
1641 | err = gid_table_setup_one(ib_dev: device); |
1642 | if (err) |
1643 | return err; |
1644 | |
1645 | rdma_for_each_port (device, p) { |
1646 | err = ib_cache_update(device, port: p, update_gids: true, update_pkeys: true, enforce_security: true); |
1647 | if (err) |
1648 | return err; |
1649 | } |
1650 | |
1651 | return 0; |
1652 | } |
1653 | |
1654 | void ib_cache_release_one(struct ib_device *device) |
1655 | { |
1656 | u32 p; |
1657 | |
1658 | /* |
1659 | * The release function frees all the cache elements. |
1660 | * This function should be called as part of freeing |
1661 | * all the device's resources when the cache could no |
1662 | * longer be accessed. |
1663 | */ |
1664 | rdma_for_each_port (device, p) |
1665 | kfree(objp: device->port_data[p].cache.pkey); |
1666 | |
1667 | gid_table_release_one(ib_dev: device); |
1668 | } |
1669 | |
1670 | void ib_cache_cleanup_one(struct ib_device *device) |
1671 | { |
1672 | /* The cleanup function waits for all in-progress workqueue |
1673 | * elements and cleans up the GID cache. This function should be |
1674 | * called after the device was removed from the devices list and |
1675 | * all clients were removed, so the cache exists but is |
1676 | * non-functional and shouldn't be updated anymore. |
1677 | */ |
1678 | flush_workqueue(ib_wq); |
1679 | gid_table_cleanup_one(ib_dev: device); |
1680 | |
1681 | /* |
1682 | * Flush the wq second time for any pending GID delete work. |
1683 | */ |
1684 | flush_workqueue(ib_wq); |
1685 | } |
1686 | |