1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ |
2 | /* |
3 | * Copyright(c) 2016 - 2019 Intel Corporation. |
4 | */ |
5 | |
6 | #ifndef DEF_RDMA_VT_H |
7 | #define DEF_RDMA_VT_H |
8 | |
9 | /* |
10 | * Structure that low level drivers will populate in order to register with the |
11 | * rdmavt layer. |
12 | */ |
13 | |
14 | #include <linux/spinlock.h> |
15 | #include <linux/list.h> |
16 | #include <linux/hash.h> |
17 | #include <rdma/ib_verbs.h> |
18 | #include <rdma/ib_mad.h> |
19 | #include <rdma/rdmavt_mr.h> |
20 | |
21 | #define RVT_MAX_PKEY_VALUES 16 |
22 | |
23 | #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ |
24 | #define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/ |
25 | #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ |
26 | |
27 | struct trap_list { |
28 | u32 list_len; |
29 | struct list_head list; |
30 | }; |
31 | |
32 | struct rvt_qp; |
33 | struct rvt_qpn_table; |
34 | struct rvt_ibport { |
35 | struct rvt_qp __rcu *qp[2]; |
36 | struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ |
37 | struct rb_root mcast_tree; |
38 | spinlock_t lock; /* protect changes in this struct */ |
39 | |
40 | /* non-zero when timer is set */ |
41 | unsigned long mkey_lease_timeout; |
42 | unsigned long trap_timeout; |
43 | __be64 gid_prefix; /* in network order */ |
44 | __be64 mkey; |
45 | u64 tid; |
46 | u32 port_cap_flags; |
47 | u16 port_cap3_flags; |
48 | u32 pma_sample_start; |
49 | u32 pma_sample_interval; |
50 | __be16 pma_counter_select[5]; |
51 | u16 pma_tag; |
52 | u16 mkey_lease_period; |
53 | u32 sm_lid; |
54 | u8 sm_sl; |
55 | u8 mkeyprot; |
56 | u8 subnet_timeout; |
57 | u8 vl_high_limit; |
58 | |
59 | /* |
60 | * Driver is expected to keep these up to date. These |
61 | * counters are informational only and not required to be |
62 | * completely accurate. |
63 | */ |
64 | u64 n_rc_resends; |
65 | u64 n_seq_naks; |
66 | u64 n_rdma_seq; |
67 | u64 n_rnr_naks; |
68 | u64 n_other_naks; |
69 | u64 n_loop_pkts; |
70 | u64 n_pkt_drops; |
71 | u64 n_vl15_dropped; |
72 | u64 n_rc_timeouts; |
73 | u64 n_dmawait; |
74 | u64 n_unaligned; |
75 | u64 n_rc_dupreq; |
76 | u64 n_rc_seqnak; |
77 | u64 n_rc_crwaits; |
78 | u16 pkey_violations; |
79 | u16 qkey_violations; |
80 | u16 mkey_violations; |
81 | |
82 | /* Hot-path per CPU counters to avoid cacheline trading to update */ |
83 | u64 z_rc_acks; |
84 | u64 z_rc_qacks; |
85 | u64 z_rc_delayed_comp; |
86 | u64 __percpu *rc_acks; |
87 | u64 __percpu *rc_qacks; |
88 | u64 __percpu *rc_delayed_comp; |
89 | |
90 | void *priv; /* driver private data */ |
91 | |
92 | /* |
93 | * The pkey table is allocated and maintained by the driver. Drivers |
94 | * need to have access to this before registering with rdmav. However |
95 | * rdmavt will need access to it so drivers need to provide this during |
96 | * the attach port API call. |
97 | */ |
98 | u16 *pkey_table; |
99 | |
100 | struct rvt_ah *sm_ah; |
101 | |
102 | /* |
103 | * Keep a list of traps that have not been repressed. They will be |
104 | * resent based on trap_timer. |
105 | */ |
106 | struct trap_list trap_lists[RVT_MAX_TRAP_LISTS]; |
107 | struct timer_list trap_timer; |
108 | }; |
109 | |
110 | #define RVT_CQN_MAX 16 /* maximum length of cq name */ |
111 | |
112 | #define RVT_SGE_COPY_MEMCPY 0 |
113 | #define RVT_SGE_COPY_CACHELESS 1 |
114 | #define RVT_SGE_COPY_ADAPTIVE 2 |
115 | |
116 | /* |
117 | * Things that are driver specific, module parameters in hfi1 and qib |
118 | */ |
119 | struct rvt_driver_params { |
120 | struct ib_device_attr props; |
121 | |
122 | /* |
123 | * Anything driver specific that is not covered by props |
124 | * For instance special module parameters. Goes here. |
125 | */ |
126 | unsigned int lkey_table_size; |
127 | unsigned int qp_table_size; |
128 | unsigned int sge_copy_mode; |
129 | unsigned int wss_threshold; |
130 | unsigned int wss_clean_period; |
131 | int qpn_start; |
132 | int qpn_inc; |
133 | int qpn_res_start; |
134 | int qpn_res_end; |
135 | int nports; |
136 | int npkeys; |
137 | int node; |
138 | int psn_mask; |
139 | int psn_shift; |
140 | int psn_modify_mask; |
141 | u32 core_cap_flags; |
142 | u32 max_mad_size; |
143 | u8 qos_shift; |
144 | u8 max_rdma_atomic; |
145 | u8 ; |
146 | u8 reserved_operations; |
147 | }; |
148 | |
149 | /* User context */ |
150 | struct rvt_ucontext { |
151 | struct ib_ucontext ibucontext; |
152 | }; |
153 | |
154 | /* Protection domain */ |
155 | struct rvt_pd { |
156 | struct ib_pd ibpd; |
157 | bool user; |
158 | }; |
159 | |
160 | /* Address handle */ |
161 | struct rvt_ah { |
162 | struct ib_ah ibah; |
163 | struct rdma_ah_attr attr; |
164 | u8 vl; |
165 | u8 log_pmtu; |
166 | }; |
167 | |
168 | /* |
169 | * This structure is used by rvt_mmap() to validate an offset |
170 | * when an mmap() request is made. The vm_area_struct then uses |
171 | * this as its vm_private_data. |
172 | */ |
173 | struct rvt_mmap_info { |
174 | struct list_head pending_mmaps; |
175 | struct ib_ucontext *context; |
176 | void *obj; |
177 | __u64 offset; |
178 | struct kref ref; |
179 | u32 size; |
180 | }; |
181 | |
182 | /* memory working set size */ |
183 | struct rvt_wss { |
184 | unsigned long *entries; |
185 | atomic_t total_count; |
186 | atomic_t clean_counter; |
187 | atomic_t clean_entry; |
188 | |
189 | int threshold; |
190 | int num_entries; |
191 | long pages_mask; |
192 | unsigned int clean_period; |
193 | }; |
194 | |
195 | struct rvt_dev_info; |
196 | struct rvt_swqe; |
197 | struct rvt_driver_provided { |
198 | /* |
199 | * Which functions are required depends on which verbs rdmavt is |
200 | * providing and which verbs the driver is overriding. See |
201 | * check_support() for details. |
202 | */ |
203 | |
204 | /* hot path calldowns in a single cacheline */ |
205 | |
206 | /* |
207 | * Give the driver a notice that there is send work to do. It is up to |
208 | * the driver to generally push the packets out, this just queues the |
209 | * work with the driver. There are two variants here. The no_lock |
210 | * version requires the s_lock not to be held. The other assumes the |
211 | * s_lock is held. |
212 | */ |
213 | bool (*schedule_send)(struct rvt_qp *qp); |
214 | bool (*schedule_send_no_lock)(struct rvt_qp *qp); |
215 | |
216 | /* |
217 | * Driver specific work request setup and checking. |
218 | * This function is allowed to perform any setup, checks, or |
219 | * adjustments required to the SWQE in order to be usable by |
220 | * underlying protocols. This includes private data structure |
221 | * allocations. |
222 | */ |
223 | int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, |
224 | bool *call_send); |
225 | |
226 | /* |
227 | * Sometimes rdmavt needs to kick the driver's send progress. That is |
228 | * done by this call back. |
229 | */ |
230 | void (*do_send)(struct rvt_qp *qp); |
231 | |
232 | /* |
233 | * Returns a pointer to the underlying hardware's PCI device. This is |
234 | * used to display information as to what hardware is being referenced |
235 | * in an output message |
236 | */ |
237 | struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); |
238 | |
239 | /* |
240 | * Allocate a private queue pair data structure for driver specific |
241 | * information which is opaque to rdmavt. Errors are returned via |
242 | * ERR_PTR(err). The driver is free to return NULL or a valid |
243 | * pointer. |
244 | */ |
245 | void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); |
246 | |
247 | /* |
248 | * Init a structure allocated with qp_priv_alloc(). This should be |
249 | * called after all qp fields have been initialized in rdmavt. |
250 | */ |
251 | int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
252 | struct ib_qp_init_attr *init_attr); |
253 | |
254 | /* |
255 | * Free the driver's private qp structure. |
256 | */ |
257 | void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); |
258 | |
259 | /* |
260 | * Inform the driver the particular qp in question has been reset so |
261 | * that it can clean up anything it needs to. |
262 | */ |
263 | void (*notify_qp_reset)(struct rvt_qp *qp); |
264 | |
265 | /* |
266 | * Get a path mtu from the driver based on qp attributes. |
267 | */ |
268 | int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
269 | struct ib_qp_attr *attr); |
270 | |
271 | /* |
272 | * Notify driver that it needs to flush any outstanding IO requests that |
273 | * are waiting on a qp. |
274 | */ |
275 | void (*flush_qp_waiters)(struct rvt_qp *qp); |
276 | |
277 | /* |
278 | * Notify driver to stop its queue of sending packets. Nothing else |
279 | * should be posted to the queue pair after this has been called. |
280 | */ |
281 | void (*stop_send_queue)(struct rvt_qp *qp); |
282 | |
283 | /* |
284 | * Have the driver drain any in progress operations |
285 | */ |
286 | void (*quiesce_qp)(struct rvt_qp *qp); |
287 | |
288 | /* |
289 | * Inform the driver a qp has went to error state. |
290 | */ |
291 | void (*notify_error_qp)(struct rvt_qp *qp); |
292 | |
293 | /* |
294 | * Get an MTU for a qp. |
295 | */ |
296 | u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
297 | u32 pmtu); |
298 | /* |
299 | * Convert an mtu to a path mtu |
300 | */ |
301 | int (*mtu_to_path_mtu)(u32 mtu); |
302 | |
303 | /* |
304 | * Get the guid of a port in big endian byte order |
305 | */ |
306 | int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, |
307 | int guid_index, __be64 *guid); |
308 | |
309 | /* |
310 | * Query driver for the state of the port. |
311 | */ |
312 | int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num, |
313 | struct ib_port_attr *props); |
314 | |
315 | /* |
316 | * Tell driver to shutdown a port |
317 | */ |
318 | int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num); |
319 | |
320 | /* Tell driver to send a trap for changed port capabilities */ |
321 | void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num); |
322 | |
323 | /* |
324 | * The following functions can be safely ignored completely. Any use of |
325 | * these is checked for NULL before blindly calling. Rdmavt should also |
326 | * be functional if drivers omit these. |
327 | */ |
328 | |
329 | /* Called to inform the driver that all qps should now be freed. */ |
330 | unsigned (*free_all_qps)(struct rvt_dev_info *rdi); |
331 | |
332 | /* Driver specific AH validation */ |
333 | int (*check_ah)(struct ib_device *, struct rdma_ah_attr *); |
334 | |
335 | /* Inform the driver a new AH has been created */ |
336 | void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *, |
337 | struct rvt_ah *); |
338 | |
339 | /* Let the driver pick the next queue pair number*/ |
340 | int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, |
341 | enum ib_qp_type type, u32 port_num); |
342 | |
343 | /* Determine if its safe or allowed to modify the qp */ |
344 | int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, |
345 | int attr_mask, struct ib_udata *udata); |
346 | |
347 | /* Driver specific QP modification/notification-of */ |
348 | void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, |
349 | int attr_mask, struct ib_udata *udata); |
350 | |
351 | /* Notify driver a mad agent has been created */ |
352 | void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); |
353 | |
354 | /* Notify driver a mad agent has been removed */ |
355 | void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); |
356 | |
357 | /* Notify driver to restart rc */ |
358 | void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait); |
359 | |
360 | /* Get and return CPU to pin CQ processing thread */ |
361 | int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect); |
362 | }; |
363 | |
364 | struct rvt_dev_info { |
365 | struct ib_device ibdev; /* Keep this first. Nothing above here */ |
366 | |
367 | /* |
368 | * Prior to calling for registration the driver will be responsible for |
369 | * allocating space for this structure. |
370 | * |
371 | * The driver will also be responsible for filling in certain members of |
372 | * dparms.props. The driver needs to fill in dparms exactly as it would |
373 | * want values reported to a ULP. This will be returned to the caller |
374 | * in rdmavt's device. The driver should also therefore refrain from |
375 | * modifying this directly after registration with rdmavt. |
376 | */ |
377 | |
378 | /* Driver specific properties */ |
379 | struct rvt_driver_params dparms; |
380 | |
381 | /* post send table */ |
382 | const struct rvt_operation_params *post_parms; |
383 | |
384 | /* opcode translation table */ |
385 | const enum ib_wc_opcode *wc_opcode; |
386 | |
387 | /* Driver specific helper functions */ |
388 | struct rvt_driver_provided driver_f; |
389 | |
390 | struct rvt_mregion __rcu *dma_mr; |
391 | struct rvt_lkey_table lkey_table; |
392 | |
393 | /* Internal use */ |
394 | int n_pds_allocated; |
395 | spinlock_t n_pds_lock; /* Protect pd allocated count */ |
396 | |
397 | int n_ahs_allocated; |
398 | spinlock_t n_ahs_lock; /* Protect ah allocated count */ |
399 | |
400 | u32 n_srqs_allocated; |
401 | spinlock_t n_srqs_lock; /* Protect srqs allocated count */ |
402 | |
403 | int flags; |
404 | struct rvt_ibport **ports; |
405 | |
406 | /* QP */ |
407 | struct rvt_qp_ibdev *qp_dev; |
408 | u32 n_qps_allocated; /* number of QPs allocated for device */ |
409 | u32 n_rc_qps; /* number of RC QPs allocated for device */ |
410 | u32 busy_jiffies; /* timeout scaling based on RC QP count */ |
411 | spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ |
412 | |
413 | /* memory maps */ |
414 | struct list_head pending_mmaps; |
415 | spinlock_t mmap_offset_lock; /* protect mmap_offset */ |
416 | u32 mmap_offset; |
417 | spinlock_t pending_lock; /* protect pending mmap list */ |
418 | |
419 | /* CQ */ |
420 | u32 n_cqs_allocated; /* number of CQs allocated for device */ |
421 | spinlock_t n_cqs_lock; /* protect count of in use cqs */ |
422 | |
423 | /* Multicast */ |
424 | u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ |
425 | spinlock_t n_mcast_grps_lock; |
426 | |
427 | /* Memory Working Set Size */ |
428 | struct rvt_wss *wss; |
429 | }; |
430 | |
431 | /** |
432 | * rvt_set_ibdev_name - Craft an IB device name from client info |
433 | * @rdi: pointer to the client rvt_dev_info structure |
434 | * @name: client specific name |
435 | * @unit: client specific unit number. |
436 | */ |
437 | static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, |
438 | const char *fmt, const char *name, |
439 | const int unit) |
440 | { |
441 | /* |
442 | * FIXME: rvt and its users want to touch the ibdev before |
443 | * registration and have things like the name work. We don't have the |
444 | * infrastructure in the core to support this directly today, hack it |
445 | * to work by setting the name manually here. |
446 | */ |
447 | dev_set_name(dev: &rdi->ibdev.dev, name: fmt, name, unit); |
448 | strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); |
449 | } |
450 | |
451 | /** |
452 | * rvt_get_ibdev_name - return the IB name |
453 | * @rdi: rdmavt device |
454 | * |
455 | * Return the registered name of the device. |
456 | */ |
457 | static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) |
458 | { |
459 | return dev_name(dev: &rdi->ibdev.dev); |
460 | } |
461 | |
462 | static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) |
463 | { |
464 | return container_of(ibpd, struct rvt_pd, ibpd); |
465 | } |
466 | |
467 | static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) |
468 | { |
469 | return container_of(ibah, struct rvt_ah, ibah); |
470 | } |
471 | |
472 | static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) |
473 | { |
474 | return container_of(ibdev, struct rvt_dev_info, ibdev); |
475 | } |
476 | |
477 | static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) |
478 | { |
479 | /* |
480 | * All ports have same number of pkeys. |
481 | */ |
482 | return rdi->dparms.npkeys; |
483 | } |
484 | |
485 | /* |
486 | * Return the max atomic suitable for determining |
487 | * the size of the ack ring buffer in a QP. |
488 | */ |
489 | static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) |
490 | { |
491 | return rdi->dparms.max_rdma_atomic + |
492 | rdi->dparms.extra_rdma_atomic + 1; |
493 | } |
494 | |
495 | static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi) |
496 | { |
497 | return rdi->dparms.max_rdma_atomic + |
498 | rdi->dparms.extra_rdma_atomic; |
499 | } |
500 | |
501 | /* |
502 | * Return the indexed PKEY from the port PKEY table. |
503 | */ |
504 | static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, |
505 | int port_index, |
506 | unsigned index) |
507 | { |
508 | if (index >= rvt_get_npkeys(rdi)) |
509 | return 0; |
510 | else |
511 | return rdi->ports[port_index]->pkey_table[index]; |
512 | } |
513 | |
514 | struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); |
515 | void rvt_dealloc_device(struct rvt_dev_info *rdi); |
516 | int rvt_register_device(struct rvt_dev_info *rvd); |
517 | void rvt_unregister_device(struct rvt_dev_info *rvd); |
518 | int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); |
519 | int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, |
520 | int port_index, u16 *pkey_table); |
521 | int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, |
522 | int access); |
523 | int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); |
524 | int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, |
525 | u32 len, u64 vaddr, u32 rkey, int acc); |
526 | int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, |
527 | struct rvt_sge *isge, struct rvt_sge *last_sge, |
528 | struct ib_sge *sge, int acc); |
529 | struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, |
530 | u16 lid); |
531 | |
532 | #endif /* DEF_RDMA_VT_H */ |
533 | |