1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2017 - 2019 Cambridge Greys Limited |
4 | * Copyright (C) 2011 - 2014 Cisco Systems Inc |
5 | * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) |
6 | * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and |
7 | * James Leu (jleu@mindspring.net). |
8 | * Copyright (C) 2001 by various other people who didn't put their name here. |
9 | */ |
10 | |
11 | #include <linux/memblock.h> |
12 | #include <linux/etherdevice.h> |
13 | #include <linux/ethtool.h> |
14 | #include <linux/inetdevice.h> |
15 | #include <linux/init.h> |
16 | #include <linux/list.h> |
17 | #include <linux/netdevice.h> |
18 | #include <linux/platform_device.h> |
19 | #include <linux/rtnetlink.h> |
20 | #include <linux/skbuff.h> |
21 | #include <linux/slab.h> |
22 | #include <linux/interrupt.h> |
23 | #include <linux/firmware.h> |
24 | #include <linux/fs.h> |
25 | #include <uapi/linux/filter.h> |
26 | #include <init.h> |
27 | #include <irq_kern.h> |
28 | #include <irq_user.h> |
29 | #include <net_kern.h> |
30 | #include <os.h> |
31 | #include "mconsole_kern.h" |
32 | #include "vector_user.h" |
33 | #include "vector_kern.h" |
34 | |
35 | /* |
36 | * Adapted from network devices with the following major changes: |
37 | * All transports are static - simplifies the code significantly |
38 | * Multiple FDs/IRQs per device |
39 | * Vector IO optionally used for read/write, falling back to legacy |
40 | * based on configuration and/or availability |
41 | * Configuration is no longer positional - L2TPv3 and GRE require up to |
42 | * 10 parameters, passing this as positional is not fit for purpose. |
43 | * Only socket transports are supported |
44 | */ |
45 | |
46 | |
47 | #define DRIVER_NAME "uml-vector" |
48 | struct vector_cmd_line_arg { |
49 | struct list_head list; |
50 | int unit; |
51 | char *arguments; |
52 | }; |
53 | |
54 | struct vector_device { |
55 | struct list_head list; |
56 | struct net_device *dev; |
57 | struct platform_device pdev; |
58 | int unit; |
59 | int opened; |
60 | }; |
61 | |
62 | static LIST_HEAD(vec_cmd_line); |
63 | |
64 | static DEFINE_SPINLOCK(vector_devices_lock); |
65 | static LIST_HEAD(vector_devices); |
66 | |
67 | static int driver_registered; |
68 | |
69 | static void vector_eth_configure(int n, struct arglist *def); |
70 | static int vector_mmsg_rx(struct vector_private *vp, int budget); |
71 | |
72 | /* Argument accessors to set variables (and/or set default values) |
73 | * mtu, buffer sizing, default headroom, etc |
74 | */ |
75 | |
76 | #define DEFAULT_HEADROOM 2 |
77 | #define SAFETY_MARGIN 32 |
78 | #define DEFAULT_VECTOR_SIZE 64 |
79 | #define TX_SMALL_PACKET 128 |
80 | #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) |
81 | |
82 | static const struct { |
83 | const char string[ETH_GSTRING_LEN]; |
84 | } ethtool_stats_keys[] = { |
85 | { "rx_queue_max" }, |
86 | { "rx_queue_running_average" }, |
87 | { "tx_queue_max" }, |
88 | { "tx_queue_running_average" }, |
89 | { "rx_encaps_errors" }, |
90 | { "tx_timeout_count" }, |
91 | { "tx_restart_queue" }, |
92 | { "tx_kicks" }, |
93 | { "tx_flow_control_xon" }, |
94 | { "tx_flow_control_xoff" }, |
95 | { "rx_csum_offload_good" }, |
96 | { "rx_csum_offload_errors" }, |
97 | { "sg_ok" }, |
98 | { "sg_linearized" }, |
99 | }; |
100 | |
101 | #define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys) |
102 | |
103 | static void vector_reset_stats(struct vector_private *vp) |
104 | { |
105 | vp->estats.rx_queue_max = 0; |
106 | vp->estats.rx_queue_running_average = 0; |
107 | vp->estats.tx_queue_max = 0; |
108 | vp->estats.tx_queue_running_average = 0; |
109 | vp->estats.rx_encaps_errors = 0; |
110 | vp->estats.tx_timeout_count = 0; |
111 | vp->estats.tx_restart_queue = 0; |
112 | vp->estats.tx_kicks = 0; |
113 | vp->estats.tx_flow_control_xon = 0; |
114 | vp->estats.tx_flow_control_xoff = 0; |
115 | vp->estats.sg_ok = 0; |
116 | vp->estats.sg_linearized = 0; |
117 | } |
118 | |
119 | static int get_mtu(struct arglist *def) |
120 | { |
121 | char *mtu = uml_vector_fetch_arg(ifspec: def, token: "mtu" ); |
122 | long result; |
123 | |
124 | if (mtu != NULL) { |
125 | if (kstrtoul(s: mtu, base: 10, res: &result) == 0) |
126 | if ((result < (1 << 16) - 1) && (result >= 576)) |
127 | return result; |
128 | } |
129 | return ETH_MAX_PACKET; |
130 | } |
131 | |
132 | static char *get_bpf_file(struct arglist *def) |
133 | { |
134 | return uml_vector_fetch_arg(ifspec: def, token: "bpffile" ); |
135 | } |
136 | |
137 | static bool get_bpf_flash(struct arglist *def) |
138 | { |
139 | char *allow = uml_vector_fetch_arg(ifspec: def, token: "bpfflash" ); |
140 | long result; |
141 | |
142 | if (allow != NULL) { |
143 | if (kstrtoul(s: allow, base: 10, res: &result) == 0) |
144 | return (allow > 0); |
145 | } |
146 | return false; |
147 | } |
148 | |
149 | static int get_depth(struct arglist *def) |
150 | { |
151 | char *mtu = uml_vector_fetch_arg(ifspec: def, token: "depth" ); |
152 | long result; |
153 | |
154 | if (mtu != NULL) { |
155 | if (kstrtoul(s: mtu, base: 10, res: &result) == 0) |
156 | return result; |
157 | } |
158 | return DEFAULT_VECTOR_SIZE; |
159 | } |
160 | |
161 | static int get_headroom(struct arglist *def) |
162 | { |
163 | char *mtu = uml_vector_fetch_arg(ifspec: def, token: "headroom" ); |
164 | long result; |
165 | |
166 | if (mtu != NULL) { |
167 | if (kstrtoul(s: mtu, base: 10, res: &result) == 0) |
168 | return result; |
169 | } |
170 | return DEFAULT_HEADROOM; |
171 | } |
172 | |
173 | static int get_req_size(struct arglist *def) |
174 | { |
175 | char *gro = uml_vector_fetch_arg(ifspec: def, token: "gro" ); |
176 | long result; |
177 | |
178 | if (gro != NULL) { |
179 | if (kstrtoul(s: gro, base: 10, res: &result) == 0) { |
180 | if (result > 0) |
181 | return 65536; |
182 | } |
183 | } |
184 | return get_mtu(def) + ETH_HEADER_OTHER + |
185 | get_headroom(def) + SAFETY_MARGIN; |
186 | } |
187 | |
188 | |
189 | static int get_transport_options(struct arglist *def) |
190 | { |
191 | char *transport = uml_vector_fetch_arg(ifspec: def, token: "transport" ); |
192 | char *vector = uml_vector_fetch_arg(ifspec: def, token: "vec" ); |
193 | |
194 | int vec_rx = VECTOR_RX; |
195 | int vec_tx = VECTOR_TX; |
196 | long parsed; |
197 | int result = 0; |
198 | |
199 | if (transport == NULL) |
200 | return -EINVAL; |
201 | |
202 | if (vector != NULL) { |
203 | if (kstrtoul(s: vector, base: 10, res: &parsed) == 0) { |
204 | if (parsed == 0) { |
205 | vec_rx = 0; |
206 | vec_tx = 0; |
207 | } |
208 | } |
209 | } |
210 | |
211 | if (get_bpf_flash(def)) |
212 | result = VECTOR_BPF_FLASH; |
213 | |
214 | if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) |
215 | return result; |
216 | if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) |
217 | return (result | vec_rx | VECTOR_BPF); |
218 | if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) |
219 | return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS); |
220 | return (result | vec_rx | vec_tx); |
221 | } |
222 | |
223 | |
224 | /* A mini-buffer for packet drop read |
225 | * All of our supported transports are datagram oriented and we always |
226 | * read using recvmsg or recvmmsg. If we pass a buffer which is smaller |
227 | * than the packet size it still counts as full packet read and will |
228 | * clean the incoming stream to keep sigio/epoll happy |
229 | */ |
230 | |
231 | #define DROP_BUFFER_SIZE 32 |
232 | |
233 | static char *drop_buffer; |
234 | |
235 | /* Array backed queues optimized for bulk enqueue/dequeue and |
236 | * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. |
237 | * For more details and full design rationale see |
238 | * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt |
239 | */ |
240 | |
241 | |
242 | /* |
243 | * Advance the mmsg queue head by n = advance. Resets the queue to |
244 | * maximum enqueue/dequeue-at-once capacity if possible. Called by |
245 | * dequeuers. Caller must hold the head_lock! |
246 | */ |
247 | |
248 | static int vector_advancehead(struct vector_queue *qi, int advance) |
249 | { |
250 | int queue_depth; |
251 | |
252 | qi->head = |
253 | (qi->head + advance) |
254 | % qi->max_depth; |
255 | |
256 | |
257 | spin_lock(lock: &qi->tail_lock); |
258 | qi->queue_depth -= advance; |
259 | |
260 | /* we are at 0, use this to |
261 | * reset head and tail so we can use max size vectors |
262 | */ |
263 | |
264 | if (qi->queue_depth == 0) { |
265 | qi->head = 0; |
266 | qi->tail = 0; |
267 | } |
268 | queue_depth = qi->queue_depth; |
269 | spin_unlock(lock: &qi->tail_lock); |
270 | return queue_depth; |
271 | } |
272 | |
273 | /* Advance the queue tail by n = advance. |
274 | * This is called by enqueuers which should hold the |
275 | * head lock already |
276 | */ |
277 | |
278 | static int vector_advancetail(struct vector_queue *qi, int advance) |
279 | { |
280 | int queue_depth; |
281 | |
282 | qi->tail = |
283 | (qi->tail + advance) |
284 | % qi->max_depth; |
285 | spin_lock(lock: &qi->head_lock); |
286 | qi->queue_depth += advance; |
287 | queue_depth = qi->queue_depth; |
288 | spin_unlock(lock: &qi->head_lock); |
289 | return queue_depth; |
290 | } |
291 | |
292 | static int prep_msg(struct vector_private *vp, |
293 | struct sk_buff *skb, |
294 | struct iovec *iov) |
295 | { |
296 | int iov_index = 0; |
297 | int nr_frags, frag; |
298 | skb_frag_t *skb_frag; |
299 | |
300 | nr_frags = skb_shinfo(skb)->nr_frags; |
301 | if (nr_frags > MAX_IOV_SIZE) { |
302 | if (skb_linearize(skb) != 0) |
303 | goto drop; |
304 | } |
305 | if (vp->header_size > 0) { |
306 | iov[iov_index].iov_len = vp->header_size; |
307 | vp->form_header(iov[iov_index].iov_base, skb, vp); |
308 | iov_index++; |
309 | } |
310 | iov[iov_index].iov_base = skb->data; |
311 | if (nr_frags > 0) { |
312 | iov[iov_index].iov_len = skb->len - skb->data_len; |
313 | vp->estats.sg_ok++; |
314 | } else |
315 | iov[iov_index].iov_len = skb->len; |
316 | iov_index++; |
317 | for (frag = 0; frag < nr_frags; frag++) { |
318 | skb_frag = &skb_shinfo(skb)->frags[frag]; |
319 | iov[iov_index].iov_base = skb_frag_address_safe(frag: skb_frag); |
320 | iov[iov_index].iov_len = skb_frag_size(frag: skb_frag); |
321 | iov_index++; |
322 | } |
323 | return iov_index; |
324 | drop: |
325 | return -1; |
326 | } |
327 | /* |
328 | * Generic vector enqueue with support for forming headers using transport |
329 | * specific callback. Allows GRE, L2TPv3, RAW and other transports |
330 | * to use a common enqueue procedure in vector mode |
331 | */ |
332 | |
333 | static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) |
334 | { |
335 | struct vector_private *vp = netdev_priv(dev: qi->dev); |
336 | int queue_depth; |
337 | int packet_len; |
338 | struct mmsghdr *mmsg_vector = qi->mmsg_vector; |
339 | int iov_count; |
340 | |
341 | spin_lock(lock: &qi->tail_lock); |
342 | spin_lock(lock: &qi->head_lock); |
343 | queue_depth = qi->queue_depth; |
344 | spin_unlock(lock: &qi->head_lock); |
345 | |
346 | if (skb) |
347 | packet_len = skb->len; |
348 | |
349 | if (queue_depth < qi->max_depth) { |
350 | |
351 | *(qi->skbuff_vector + qi->tail) = skb; |
352 | mmsg_vector += qi->tail; |
353 | iov_count = prep_msg( |
354 | vp, |
355 | skb, |
356 | iov: mmsg_vector->msg_hdr.msg_iov |
357 | ); |
358 | if (iov_count < 1) |
359 | goto drop; |
360 | mmsg_vector->msg_hdr.msg_iovlen = iov_count; |
361 | mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; |
362 | mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; |
363 | queue_depth = vector_advancetail(qi, advance: 1); |
364 | } else |
365 | goto drop; |
366 | spin_unlock(lock: &qi->tail_lock); |
367 | return queue_depth; |
368 | drop: |
369 | qi->dev->stats.tx_dropped++; |
370 | if (skb != NULL) { |
371 | packet_len = skb->len; |
372 | dev_consume_skb_any(skb); |
373 | netdev_completed_queue(dev: qi->dev, pkts: 1, bytes: packet_len); |
374 | } |
375 | spin_unlock(lock: &qi->tail_lock); |
376 | return queue_depth; |
377 | } |
378 | |
379 | static int consume_vector_skbs(struct vector_queue *qi, int count) |
380 | { |
381 | struct sk_buff *skb; |
382 | int skb_index; |
383 | int bytes_compl = 0; |
384 | |
385 | for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) { |
386 | skb = *(qi->skbuff_vector + skb_index); |
387 | /* mark as empty to ensure correct destruction if |
388 | * needed |
389 | */ |
390 | bytes_compl += skb->len; |
391 | *(qi->skbuff_vector + skb_index) = NULL; |
392 | dev_consume_skb_any(skb); |
393 | } |
394 | qi->dev->stats.tx_bytes += bytes_compl; |
395 | qi->dev->stats.tx_packets += count; |
396 | netdev_completed_queue(dev: qi->dev, pkts: count, bytes: bytes_compl); |
397 | return vector_advancehead(qi, advance: count); |
398 | } |
399 | |
400 | /* |
401 | * Generic vector deque via sendmmsg with support for forming headers |
402 | * using transport specific callback. Allows GRE, L2TPv3, RAW and |
403 | * other transports to use a common dequeue procedure in vector mode |
404 | */ |
405 | |
406 | |
407 | static int vector_send(struct vector_queue *qi) |
408 | { |
409 | struct vector_private *vp = netdev_priv(dev: qi->dev); |
410 | struct mmsghdr *send_from; |
411 | int result = 0, send_len, queue_depth = qi->max_depth; |
412 | |
413 | if (spin_trylock(lock: &qi->head_lock)) { |
414 | if (spin_trylock(lock: &qi->tail_lock)) { |
415 | /* update queue_depth to current value */ |
416 | queue_depth = qi->queue_depth; |
417 | spin_unlock(lock: &qi->tail_lock); |
418 | while (queue_depth > 0) { |
419 | /* Calculate the start of the vector */ |
420 | send_len = queue_depth; |
421 | send_from = qi->mmsg_vector; |
422 | send_from += qi->head; |
423 | /* Adjust vector size if wraparound */ |
424 | if (send_len + qi->head > qi->max_depth) |
425 | send_len = qi->max_depth - qi->head; |
426 | /* Try to TX as many packets as possible */ |
427 | if (send_len > 0) { |
428 | result = uml_vector_sendmmsg( |
429 | fd: vp->fds->tx_fd, |
430 | msgvec: send_from, |
431 | vlen: send_len, |
432 | flags: 0 |
433 | ); |
434 | vp->in_write_poll = |
435 | (result != send_len); |
436 | } |
437 | /* For some of the sendmmsg error scenarios |
438 | * we may end being unsure in the TX success |
439 | * for all packets. It is safer to declare |
440 | * them all TX-ed and blame the network. |
441 | */ |
442 | if (result < 0) { |
443 | if (net_ratelimit()) |
444 | netdev_err(dev: vp->dev, format: "sendmmsg err=%i\n" , |
445 | result); |
446 | vp->in_error = true; |
447 | result = send_len; |
448 | } |
449 | if (result > 0) { |
450 | queue_depth = |
451 | consume_vector_skbs(qi, count: result); |
452 | /* This is equivalent to an TX IRQ. |
453 | * Restart the upper layers to feed us |
454 | * more packets. |
455 | */ |
456 | if (result > vp->estats.tx_queue_max) |
457 | vp->estats.tx_queue_max = result; |
458 | vp->estats.tx_queue_running_average = |
459 | (vp->estats.tx_queue_running_average + result) >> 1; |
460 | } |
461 | netif_wake_queue(dev: qi->dev); |
462 | /* if TX is busy, break out of the send loop, |
463 | * poll write IRQ will reschedule xmit for us |
464 | */ |
465 | if (result != send_len) { |
466 | vp->estats.tx_restart_queue++; |
467 | break; |
468 | } |
469 | } |
470 | } |
471 | spin_unlock(lock: &qi->head_lock); |
472 | } |
473 | return queue_depth; |
474 | } |
475 | |
476 | /* Queue destructor. Deliberately stateless so we can use |
477 | * it in queue cleanup if initialization fails. |
478 | */ |
479 | |
480 | static void destroy_queue(struct vector_queue *qi) |
481 | { |
482 | int i; |
483 | struct iovec *iov; |
484 | struct vector_private *vp = netdev_priv(dev: qi->dev); |
485 | struct mmsghdr *mmsg_vector; |
486 | |
487 | if (qi == NULL) |
488 | return; |
489 | /* deallocate any skbuffs - we rely on any unused to be |
490 | * set to NULL. |
491 | */ |
492 | if (qi->skbuff_vector != NULL) { |
493 | for (i = 0; i < qi->max_depth; i++) { |
494 | if (*(qi->skbuff_vector + i) != NULL) |
495 | dev_kfree_skb_any(skb: *(qi->skbuff_vector + i)); |
496 | } |
497 | kfree(objp: qi->skbuff_vector); |
498 | } |
499 | /* deallocate matching IOV structures including header buffs */ |
500 | if (qi->mmsg_vector != NULL) { |
501 | mmsg_vector = qi->mmsg_vector; |
502 | for (i = 0; i < qi->max_depth; i++) { |
503 | iov = mmsg_vector->msg_hdr.msg_iov; |
504 | if (iov != NULL) { |
505 | if ((vp->header_size > 0) && |
506 | (iov->iov_base != NULL)) |
507 | kfree(objp: iov->iov_base); |
508 | kfree(objp: iov); |
509 | } |
510 | mmsg_vector++; |
511 | } |
512 | kfree(objp: qi->mmsg_vector); |
513 | } |
514 | kfree(objp: qi); |
515 | } |
516 | |
517 | /* |
518 | * Queue constructor. Create a queue with a given side. |
519 | */ |
520 | static struct vector_queue *create_queue( |
521 | struct vector_private *vp, |
522 | int max_size, |
523 | int , |
524 | int ) |
525 | { |
526 | struct vector_queue *result; |
527 | int i; |
528 | struct iovec *iov; |
529 | struct mmsghdr *mmsg_vector; |
530 | |
531 | result = kmalloc(size: sizeof(struct vector_queue), GFP_KERNEL); |
532 | if (result == NULL) |
533 | return NULL; |
534 | result->max_depth = max_size; |
535 | result->dev = vp->dev; |
536 | result->mmsg_vector = kmalloc( |
537 | size: (sizeof(struct mmsghdr) * max_size), GFP_KERNEL); |
538 | if (result->mmsg_vector == NULL) |
539 | goto out_mmsg_fail; |
540 | result->skbuff_vector = kmalloc( |
541 | size: (sizeof(void *) * max_size), GFP_KERNEL); |
542 | if (result->skbuff_vector == NULL) |
543 | goto out_skb_fail; |
544 | |
545 | /* further failures can be handled safely by destroy_queue*/ |
546 | |
547 | mmsg_vector = result->mmsg_vector; |
548 | for (i = 0; i < max_size; i++) { |
549 | /* Clear all pointers - we use non-NULL as marking on |
550 | * what to free on destruction |
551 | */ |
552 | *(result->skbuff_vector + i) = NULL; |
553 | mmsg_vector->msg_hdr.msg_iov = NULL; |
554 | mmsg_vector++; |
555 | } |
556 | mmsg_vector = result->mmsg_vector; |
557 | result->max_iov_frags = num_extra_frags; |
558 | for (i = 0; i < max_size; i++) { |
559 | if (vp->header_size > 0) |
560 | iov = kmalloc_array(n: 3 + num_extra_frags, |
561 | size: sizeof(struct iovec), |
562 | GFP_KERNEL |
563 | ); |
564 | else |
565 | iov = kmalloc_array(n: 2 + num_extra_frags, |
566 | size: sizeof(struct iovec), |
567 | GFP_KERNEL |
568 | ); |
569 | if (iov == NULL) |
570 | goto out_fail; |
571 | mmsg_vector->msg_hdr.msg_iov = iov; |
572 | mmsg_vector->msg_hdr.msg_iovlen = 1; |
573 | mmsg_vector->msg_hdr.msg_control = NULL; |
574 | mmsg_vector->msg_hdr.msg_controllen = 0; |
575 | mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT; |
576 | mmsg_vector->msg_hdr.msg_name = NULL; |
577 | mmsg_vector->msg_hdr.msg_namelen = 0; |
578 | if (vp->header_size > 0) { |
579 | iov->iov_base = kmalloc(size: header_size, GFP_KERNEL); |
580 | if (iov->iov_base == NULL) |
581 | goto out_fail; |
582 | iov->iov_len = header_size; |
583 | mmsg_vector->msg_hdr.msg_iovlen = 2; |
584 | iov++; |
585 | } |
586 | iov->iov_base = NULL; |
587 | iov->iov_len = 0; |
588 | mmsg_vector++; |
589 | } |
590 | spin_lock_init(&result->head_lock); |
591 | spin_lock_init(&result->tail_lock); |
592 | result->queue_depth = 0; |
593 | result->head = 0; |
594 | result->tail = 0; |
595 | return result; |
596 | out_skb_fail: |
597 | kfree(objp: result->mmsg_vector); |
598 | out_mmsg_fail: |
599 | kfree(objp: result); |
600 | return NULL; |
601 | out_fail: |
602 | destroy_queue(qi: result); |
603 | return NULL; |
604 | } |
605 | |
606 | /* |
607 | * We do not use the RX queue as a proper wraparound queue for now |
608 | * This is not necessary because the consumption via napi_gro_receive() |
609 | * happens in-line. While we can try using the return code of |
610 | * netif_rx() for flow control there are no drivers doing this today. |
611 | * For this RX specific use we ignore the tail/head locks and |
612 | * just read into a prepared queue filled with skbuffs. |
613 | */ |
614 | |
615 | static struct sk_buff *prep_skb( |
616 | struct vector_private *vp, |
617 | struct user_msghdr *msg) |
618 | { |
619 | int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN; |
620 | struct sk_buff *result; |
621 | int iov_index = 0, len; |
622 | struct iovec *iov = msg->msg_iov; |
623 | int err, nr_frags, frag; |
624 | skb_frag_t *skb_frag; |
625 | |
626 | if (vp->req_size <= linear) |
627 | len = linear; |
628 | else |
629 | len = vp->req_size; |
630 | result = alloc_skb_with_frags( |
631 | header_len: linear, |
632 | data_len: len - vp->max_packet, |
633 | max_page_order: 3, |
634 | errcode: &err, |
635 | GFP_ATOMIC |
636 | ); |
637 | if (vp->header_size > 0) |
638 | iov_index++; |
639 | if (result == NULL) { |
640 | iov[iov_index].iov_base = NULL; |
641 | iov[iov_index].iov_len = 0; |
642 | goto done; |
643 | } |
644 | skb_reserve(skb: result, len: vp->headroom); |
645 | result->dev = vp->dev; |
646 | skb_put(skb: result, len: vp->max_packet); |
647 | result->data_len = len - vp->max_packet; |
648 | result->len += len - vp->max_packet; |
649 | skb_reset_mac_header(skb: result); |
650 | result->ip_summed = CHECKSUM_NONE; |
651 | iov[iov_index].iov_base = result->data; |
652 | iov[iov_index].iov_len = vp->max_packet; |
653 | iov_index++; |
654 | |
655 | nr_frags = skb_shinfo(result)->nr_frags; |
656 | for (frag = 0; frag < nr_frags; frag++) { |
657 | skb_frag = &skb_shinfo(result)->frags[frag]; |
658 | iov[iov_index].iov_base = skb_frag_address_safe(frag: skb_frag); |
659 | if (iov[iov_index].iov_base != NULL) |
660 | iov[iov_index].iov_len = skb_frag_size(frag: skb_frag); |
661 | else |
662 | iov[iov_index].iov_len = 0; |
663 | iov_index++; |
664 | } |
665 | done: |
666 | msg->msg_iovlen = iov_index; |
667 | return result; |
668 | } |
669 | |
670 | |
671 | /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ |
672 | |
673 | static void prep_queue_for_rx(struct vector_queue *qi) |
674 | { |
675 | struct vector_private *vp = netdev_priv(dev: qi->dev); |
676 | struct mmsghdr *mmsg_vector = qi->mmsg_vector; |
677 | void **skbuff_vector = qi->skbuff_vector; |
678 | int i; |
679 | |
680 | if (qi->queue_depth == 0) |
681 | return; |
682 | for (i = 0; i < qi->queue_depth; i++) { |
683 | /* it is OK if allocation fails - recvmmsg with NULL data in |
684 | * iov argument still performs an RX, just drops the packet |
685 | * This allows us stop faffing around with a "drop buffer" |
686 | */ |
687 | |
688 | *skbuff_vector = prep_skb(vp, msg: &mmsg_vector->msg_hdr); |
689 | skbuff_vector++; |
690 | mmsg_vector++; |
691 | } |
692 | qi->queue_depth = 0; |
693 | } |
694 | |
695 | static struct vector_device *find_device(int n) |
696 | { |
697 | struct vector_device *device; |
698 | struct list_head *ele; |
699 | |
700 | spin_lock(lock: &vector_devices_lock); |
701 | list_for_each(ele, &vector_devices) { |
702 | device = list_entry(ele, struct vector_device, list); |
703 | if (device->unit == n) |
704 | goto out; |
705 | } |
706 | device = NULL; |
707 | out: |
708 | spin_unlock(lock: &vector_devices_lock); |
709 | return device; |
710 | } |
711 | |
712 | static int vector_parse(char *str, int *index_out, char **str_out, |
713 | char **error_out) |
714 | { |
715 | int n, len, err; |
716 | char *start = str; |
717 | |
718 | len = strlen(str); |
719 | |
720 | while ((*str != ':') && (strlen(str) > 1)) |
721 | str++; |
722 | if (*str != ':') { |
723 | *error_out = "Expected ':' after device number" ; |
724 | return -EINVAL; |
725 | } |
726 | *str = '\0'; |
727 | |
728 | err = kstrtouint(s: start, base: 0, res: &n); |
729 | if (err < 0) { |
730 | *error_out = "Bad device number" ; |
731 | return err; |
732 | } |
733 | |
734 | str++; |
735 | if (find_device(n)) { |
736 | *error_out = "Device already configured" ; |
737 | return -EINVAL; |
738 | } |
739 | |
740 | *index_out = n; |
741 | *str_out = str; |
742 | return 0; |
743 | } |
744 | |
745 | static int vector_config(char *str, char **error_out) |
746 | { |
747 | int err, n; |
748 | char *params; |
749 | struct arglist *parsed; |
750 | |
751 | err = vector_parse(str, index_out: &n, str_out: ¶ms, error_out); |
752 | if (err != 0) |
753 | return err; |
754 | |
755 | /* This string is broken up and the pieces used by the underlying |
756 | * driver. We should copy it to make sure things do not go wrong |
757 | * later. |
758 | */ |
759 | |
760 | params = kstrdup(s: params, GFP_KERNEL); |
761 | if (params == NULL) { |
762 | *error_out = "vector_config failed to strdup string" ; |
763 | return -ENOMEM; |
764 | } |
765 | |
766 | parsed = uml_parse_vector_ifspec(arg: params); |
767 | |
768 | if (parsed == NULL) { |
769 | *error_out = "vector_config failed to parse parameters" ; |
770 | kfree(objp: params); |
771 | return -EINVAL; |
772 | } |
773 | |
774 | vector_eth_configure(n, def: parsed); |
775 | return 0; |
776 | } |
777 | |
778 | static int vector_id(char **str, int *start_out, int *end_out) |
779 | { |
780 | char *end; |
781 | int n; |
782 | |
783 | n = simple_strtoul(*str, &end, 0); |
784 | if ((*end != '\0') || (end == *str)) |
785 | return -1; |
786 | |
787 | *start_out = n; |
788 | *end_out = n; |
789 | *str = end; |
790 | return n; |
791 | } |
792 | |
793 | static int vector_remove(int n, char **error_out) |
794 | { |
795 | struct vector_device *vec_d; |
796 | struct net_device *dev; |
797 | struct vector_private *vp; |
798 | |
799 | vec_d = find_device(n); |
800 | if (vec_d == NULL) |
801 | return -ENODEV; |
802 | dev = vec_d->dev; |
803 | vp = netdev_priv(dev); |
804 | if (vp->fds != NULL) |
805 | return -EBUSY; |
806 | unregister_netdev(dev); |
807 | platform_device_unregister(&vec_d->pdev); |
808 | return 0; |
809 | } |
810 | |
811 | /* |
812 | * There is no shared per-transport initialization code, so |
813 | * we will just initialize each interface one by one and |
814 | * add them to a list |
815 | */ |
816 | |
817 | static struct platform_driver uml_net_driver = { |
818 | .driver = { |
819 | .name = DRIVER_NAME, |
820 | }, |
821 | }; |
822 | |
823 | |
824 | static void vector_device_release(struct device *dev) |
825 | { |
826 | struct vector_device *device = dev_get_drvdata(dev); |
827 | struct net_device *netdev = device->dev; |
828 | |
829 | list_del(entry: &device->list); |
830 | kfree(objp: device); |
831 | free_netdev(dev: netdev); |
832 | } |
833 | |
834 | /* Bog standard recv using recvmsg - not used normally unless the user |
835 | * explicitly specifies not to use recvmmsg vector RX. |
836 | */ |
837 | |
838 | static int vector_legacy_rx(struct vector_private *vp) |
839 | { |
840 | int pkt_len; |
841 | struct user_msghdr hdr; |
842 | struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */ |
843 | int iovpos = 0; |
844 | struct sk_buff *skb; |
845 | int ; |
846 | |
847 | hdr.msg_name = NULL; |
848 | hdr.msg_namelen = 0; |
849 | hdr.msg_iov = (struct iovec *) &iov; |
850 | hdr.msg_control = NULL; |
851 | hdr.msg_controllen = 0; |
852 | hdr.msg_flags = 0; |
853 | |
854 | if (vp->header_size > 0) { |
855 | iov[0].iov_base = vp->header_rxbuffer; |
856 | iov[0].iov_len = vp->header_size; |
857 | } |
858 | |
859 | skb = prep_skb(vp, msg: &hdr); |
860 | |
861 | if (skb == NULL) { |
862 | /* Read a packet into drop_buffer and don't do |
863 | * anything with it. |
864 | */ |
865 | iov[iovpos].iov_base = drop_buffer; |
866 | iov[iovpos].iov_len = DROP_BUFFER_SIZE; |
867 | hdr.msg_iovlen = 1; |
868 | vp->dev->stats.rx_dropped++; |
869 | } |
870 | |
871 | pkt_len = uml_vector_recvmsg(fd: vp->fds->rx_fd, hdr: &hdr, flags: 0); |
872 | if (pkt_len < 0) { |
873 | vp->in_error = true; |
874 | return pkt_len; |
875 | } |
876 | |
877 | if (skb != NULL) { |
878 | if (pkt_len > vp->header_size) { |
879 | if (vp->header_size > 0) { |
880 | header_check = vp->verify_header( |
881 | vp->header_rxbuffer, skb, vp); |
882 | if (header_check < 0) { |
883 | dev_kfree_skb_irq(skb); |
884 | vp->dev->stats.rx_dropped++; |
885 | vp->estats.rx_encaps_errors++; |
886 | return 0; |
887 | } |
888 | if (header_check > 0) { |
889 | vp->estats.rx_csum_offload_good++; |
890 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
891 | } |
892 | } |
893 | pskb_trim(skb, len: pkt_len - vp->rx_header_size); |
894 | skb->protocol = eth_type_trans(skb, dev: skb->dev); |
895 | vp->dev->stats.rx_bytes += skb->len; |
896 | vp->dev->stats.rx_packets++; |
897 | napi_gro_receive(napi: &vp->napi, skb); |
898 | } else { |
899 | dev_kfree_skb_irq(skb); |
900 | } |
901 | } |
902 | return pkt_len; |
903 | } |
904 | |
905 | /* |
906 | * Packet at a time TX which falls back to vector TX if the |
907 | * underlying transport is busy. |
908 | */ |
909 | |
910 | |
911 | |
912 | static int writev_tx(struct vector_private *vp, struct sk_buff *skb) |
913 | { |
914 | struct iovec iov[3 + MAX_IOV_SIZE]; |
915 | int iov_count, pkt_len = 0; |
916 | |
917 | iov[0].iov_base = vp->header_txbuffer; |
918 | iov_count = prep_msg(vp, skb, iov: (struct iovec *) &iov); |
919 | |
920 | if (iov_count < 1) |
921 | goto drop; |
922 | |
923 | pkt_len = uml_vector_writev( |
924 | fd: vp->fds->tx_fd, |
925 | hdr: (struct iovec *) &iov, |
926 | iovcount: iov_count |
927 | ); |
928 | |
929 | if (pkt_len < 0) |
930 | goto drop; |
931 | |
932 | netif_trans_update(dev: vp->dev); |
933 | netif_wake_queue(dev: vp->dev); |
934 | |
935 | if (pkt_len > 0) { |
936 | vp->dev->stats.tx_bytes += skb->len; |
937 | vp->dev->stats.tx_packets++; |
938 | } else { |
939 | vp->dev->stats.tx_dropped++; |
940 | } |
941 | consume_skb(skb); |
942 | return pkt_len; |
943 | drop: |
944 | vp->dev->stats.tx_dropped++; |
945 | consume_skb(skb); |
946 | if (pkt_len < 0) |
947 | vp->in_error = true; |
948 | return pkt_len; |
949 | } |
950 | |
951 | /* |
952 | * Receive as many messages as we can in one call using the special |
953 | * mmsg vector matched to an skb vector which we prepared earlier. |
954 | */ |
955 | |
956 | static int vector_mmsg_rx(struct vector_private *vp, int budget) |
957 | { |
958 | int packet_count, i; |
959 | struct vector_queue *qi = vp->rx_queue; |
960 | struct sk_buff *skb; |
961 | struct mmsghdr *mmsg_vector = qi->mmsg_vector; |
962 | void **skbuff_vector = qi->skbuff_vector; |
963 | int ; |
964 | |
965 | /* Refresh the vector and make sure it is with new skbs and the |
966 | * iovs are updated to point to them. |
967 | */ |
968 | |
969 | prep_queue_for_rx(qi); |
970 | |
971 | /* Fire the Lazy Gun - get as many packets as we can in one go. */ |
972 | |
973 | if (budget > qi->max_depth) |
974 | budget = qi->max_depth; |
975 | |
976 | packet_count = uml_vector_recvmmsg( |
977 | fd: vp->fds->rx_fd, msgvec: qi->mmsg_vector, vlen: qi->max_depth, flags: 0); |
978 | |
979 | if (packet_count < 0) |
980 | vp->in_error = true; |
981 | |
982 | if (packet_count <= 0) |
983 | return packet_count; |
984 | |
985 | /* We treat packet processing as enqueue, buffer refresh as dequeue |
986 | * The queue_depth tells us how many buffers have been used and how |
987 | * many do we need to prep the next time prep_queue_for_rx() is called. |
988 | */ |
989 | |
990 | qi->queue_depth = packet_count; |
991 | |
992 | for (i = 0; i < packet_count; i++) { |
993 | skb = (*skbuff_vector); |
994 | if (mmsg_vector->msg_len > vp->header_size) { |
995 | if (vp->header_size > 0) { |
996 | header_check = vp->verify_header( |
997 | mmsg_vector->msg_hdr.msg_iov->iov_base, |
998 | skb, |
999 | vp |
1000 | ); |
1001 | if (header_check < 0) { |
1002 | /* Overlay header failed to verify - discard. |
1003 | * We can actually keep this skb and reuse it, |
1004 | * but that will make the prep logic too |
1005 | * complex. |
1006 | */ |
1007 | dev_kfree_skb_irq(skb); |
1008 | vp->estats.rx_encaps_errors++; |
1009 | continue; |
1010 | } |
1011 | if (header_check > 0) { |
1012 | vp->estats.rx_csum_offload_good++; |
1013 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1014 | } |
1015 | } |
1016 | pskb_trim(skb, |
1017 | len: mmsg_vector->msg_len - vp->rx_header_size); |
1018 | skb->protocol = eth_type_trans(skb, dev: skb->dev); |
1019 | /* |
1020 | * We do not need to lock on updating stats here |
1021 | * The interrupt loop is non-reentrant. |
1022 | */ |
1023 | vp->dev->stats.rx_bytes += skb->len; |
1024 | vp->dev->stats.rx_packets++; |
1025 | napi_gro_receive(napi: &vp->napi, skb); |
1026 | } else { |
1027 | /* Overlay header too short to do anything - discard. |
1028 | * We can actually keep this skb and reuse it, |
1029 | * but that will make the prep logic too complex. |
1030 | */ |
1031 | if (skb != NULL) |
1032 | dev_kfree_skb_irq(skb); |
1033 | } |
1034 | (*skbuff_vector) = NULL; |
1035 | /* Move to the next buffer element */ |
1036 | mmsg_vector++; |
1037 | skbuff_vector++; |
1038 | } |
1039 | if (packet_count > 0) { |
1040 | if (vp->estats.rx_queue_max < packet_count) |
1041 | vp->estats.rx_queue_max = packet_count; |
1042 | vp->estats.rx_queue_running_average = |
1043 | (vp->estats.rx_queue_running_average + packet_count) >> 1; |
1044 | } |
1045 | return packet_count; |
1046 | } |
1047 | |
1048 | static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) |
1049 | { |
1050 | struct vector_private *vp = netdev_priv(dev); |
1051 | int queue_depth = 0; |
1052 | |
1053 | if (vp->in_error) { |
1054 | deactivate_fd(vp->fds->rx_fd, vp->rx_irq); |
1055 | if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0)) |
1056 | deactivate_fd(vp->fds->tx_fd, vp->tx_irq); |
1057 | return NETDEV_TX_BUSY; |
1058 | } |
1059 | |
1060 | if ((vp->options & VECTOR_TX) == 0) { |
1061 | writev_tx(vp, skb); |
1062 | return NETDEV_TX_OK; |
1063 | } |
1064 | |
1065 | /* We do BQL only in the vector path, no point doing it in |
1066 | * packet at a time mode as there is no device queue |
1067 | */ |
1068 | |
1069 | netdev_sent_queue(dev: vp->dev, bytes: skb->len); |
1070 | queue_depth = vector_enqueue(qi: vp->tx_queue, skb); |
1071 | |
1072 | if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) { |
1073 | mod_timer(timer: &vp->tl, expires: vp->coalesce); |
1074 | return NETDEV_TX_OK; |
1075 | } else { |
1076 | queue_depth = vector_send(qi: vp->tx_queue); |
1077 | if (queue_depth > 0) |
1078 | napi_schedule(n: &vp->napi); |
1079 | } |
1080 | |
1081 | return NETDEV_TX_OK; |
1082 | } |
1083 | |
1084 | static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) |
1085 | { |
1086 | struct net_device *dev = dev_id; |
1087 | struct vector_private *vp = netdev_priv(dev); |
1088 | |
1089 | if (!netif_running(dev)) |
1090 | return IRQ_NONE; |
1091 | napi_schedule(n: &vp->napi); |
1092 | return IRQ_HANDLED; |
1093 | |
1094 | } |
1095 | |
1096 | static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) |
1097 | { |
1098 | struct net_device *dev = dev_id; |
1099 | struct vector_private *vp = netdev_priv(dev); |
1100 | |
1101 | if (!netif_running(dev)) |
1102 | return IRQ_NONE; |
1103 | /* We need to pay attention to it only if we got |
1104 | * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise |
1105 | * we ignore it. In the future, it may be worth |
1106 | * it to improve the IRQ controller a bit to make |
1107 | * tweaking the IRQ mask less costly |
1108 | */ |
1109 | |
1110 | napi_schedule(n: &vp->napi); |
1111 | return IRQ_HANDLED; |
1112 | |
1113 | } |
1114 | |
1115 | static int irq_rr; |
1116 | |
1117 | static int vector_net_close(struct net_device *dev) |
1118 | { |
1119 | struct vector_private *vp = netdev_priv(dev); |
1120 | unsigned long flags; |
1121 | |
1122 | netif_stop_queue(dev); |
1123 | del_timer(timer: &vp->tl); |
1124 | |
1125 | if (vp->fds == NULL) |
1126 | return 0; |
1127 | |
1128 | /* Disable and free all IRQS */ |
1129 | if (vp->rx_irq > 0) { |
1130 | um_free_irq(vp->rx_irq, dev); |
1131 | vp->rx_irq = 0; |
1132 | } |
1133 | if (vp->tx_irq > 0) { |
1134 | um_free_irq(vp->tx_irq, dev); |
1135 | vp->tx_irq = 0; |
1136 | } |
1137 | napi_disable(n: &vp->napi); |
1138 | netif_napi_del(napi: &vp->napi); |
1139 | if (vp->fds->rx_fd > 0) { |
1140 | if (vp->bpf) |
1141 | uml_vector_detach_bpf(fd: vp->fds->rx_fd, bpf: vp->bpf); |
1142 | os_close_file(vp->fds->rx_fd); |
1143 | vp->fds->rx_fd = -1; |
1144 | } |
1145 | if (vp->fds->tx_fd > 0) { |
1146 | os_close_file(vp->fds->tx_fd); |
1147 | vp->fds->tx_fd = -1; |
1148 | } |
1149 | if (vp->bpf != NULL) |
1150 | kfree(objp: vp->bpf->filter); |
1151 | kfree(objp: vp->bpf); |
1152 | vp->bpf = NULL; |
1153 | kfree(objp: vp->fds->remote_addr); |
1154 | kfree(objp: vp->transport_data); |
1155 | kfree(objp: vp->header_rxbuffer); |
1156 | kfree(objp: vp->header_txbuffer); |
1157 | if (vp->rx_queue != NULL) |
1158 | destroy_queue(qi: vp->rx_queue); |
1159 | if (vp->tx_queue != NULL) |
1160 | destroy_queue(qi: vp->tx_queue); |
1161 | kfree(objp: vp->fds); |
1162 | vp->fds = NULL; |
1163 | spin_lock_irqsave(&vp->lock, flags); |
1164 | vp->opened = false; |
1165 | vp->in_error = false; |
1166 | spin_unlock_irqrestore(lock: &vp->lock, flags); |
1167 | return 0; |
1168 | } |
1169 | |
1170 | static int vector_poll(struct napi_struct *napi, int budget) |
1171 | { |
1172 | struct vector_private *vp = container_of(napi, struct vector_private, napi); |
1173 | int work_done = 0; |
1174 | int err; |
1175 | bool tx_enqueued = false; |
1176 | |
1177 | if ((vp->options & VECTOR_TX) != 0) |
1178 | tx_enqueued = (vector_send(qi: vp->tx_queue) > 0); |
1179 | if ((vp->options & VECTOR_RX) > 0) |
1180 | err = vector_mmsg_rx(vp, budget); |
1181 | else { |
1182 | err = vector_legacy_rx(vp); |
1183 | if (err > 0) |
1184 | err = 1; |
1185 | } |
1186 | if (err > 0) |
1187 | work_done += err; |
1188 | |
1189 | if (tx_enqueued || err > 0) |
1190 | napi_schedule(n: napi); |
1191 | if (work_done < budget) |
1192 | napi_complete_done(n: napi, work_done); |
1193 | return work_done; |
1194 | } |
1195 | |
1196 | static void vector_reset_tx(struct work_struct *work) |
1197 | { |
1198 | struct vector_private *vp = |
1199 | container_of(work, struct vector_private, reset_tx); |
1200 | netdev_reset_queue(dev_queue: vp->dev); |
1201 | netif_start_queue(dev: vp->dev); |
1202 | netif_wake_queue(dev: vp->dev); |
1203 | } |
1204 | |
1205 | static int vector_net_open(struct net_device *dev) |
1206 | { |
1207 | struct vector_private *vp = netdev_priv(dev); |
1208 | unsigned long flags; |
1209 | int err = -EINVAL; |
1210 | struct vector_device *vdevice; |
1211 | |
1212 | spin_lock_irqsave(&vp->lock, flags); |
1213 | if (vp->opened) { |
1214 | spin_unlock_irqrestore(lock: &vp->lock, flags); |
1215 | return -ENXIO; |
1216 | } |
1217 | vp->opened = true; |
1218 | spin_unlock_irqrestore(lock: &vp->lock, flags); |
1219 | |
1220 | vp->bpf = uml_vector_user_bpf(filename: get_bpf_file(def: vp->parsed)); |
1221 | |
1222 | vp->fds = uml_vector_user_open(unit: vp->unit, parsed: vp->parsed); |
1223 | |
1224 | if (vp->fds == NULL) |
1225 | goto out_close; |
1226 | |
1227 | if (build_transport_data(vp) < 0) |
1228 | goto out_close; |
1229 | |
1230 | if ((vp->options & VECTOR_RX) > 0) { |
1231 | vp->rx_queue = create_queue( |
1232 | vp, |
1233 | max_size: get_depth(def: vp->parsed), |
1234 | header_size: vp->rx_header_size, |
1235 | MAX_IOV_SIZE |
1236 | ); |
1237 | vp->rx_queue->queue_depth = get_depth(def: vp->parsed); |
1238 | } else { |
1239 | vp->header_rxbuffer = kmalloc( |
1240 | size: vp->rx_header_size, |
1241 | GFP_KERNEL |
1242 | ); |
1243 | if (vp->header_rxbuffer == NULL) |
1244 | goto out_close; |
1245 | } |
1246 | if ((vp->options & VECTOR_TX) > 0) { |
1247 | vp->tx_queue = create_queue( |
1248 | vp, |
1249 | max_size: get_depth(def: vp->parsed), |
1250 | header_size: vp->header_size, |
1251 | MAX_IOV_SIZE |
1252 | ); |
1253 | } else { |
1254 | vp->header_txbuffer = kmalloc(size: vp->header_size, GFP_KERNEL); |
1255 | if (vp->header_txbuffer == NULL) |
1256 | goto out_close; |
1257 | } |
1258 | |
1259 | netif_napi_add_weight(dev: vp->dev, napi: &vp->napi, poll: vector_poll, |
1260 | weight: get_depth(def: vp->parsed)); |
1261 | napi_enable(n: &vp->napi); |
1262 | |
1263 | /* READ IRQ */ |
1264 | err = um_request_irq( |
1265 | irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, |
1266 | IRQ_READ, vector_rx_interrupt, |
1267 | IRQF_SHARED, dev->name, dev); |
1268 | if (err < 0) { |
1269 | netdev_err(dev, format: "vector_open: failed to get rx irq(%d)\n" , err); |
1270 | err = -ENETUNREACH; |
1271 | goto out_close; |
1272 | } |
1273 | vp->rx_irq = irq_rr + VECTOR_BASE_IRQ; |
1274 | dev->irq = irq_rr + VECTOR_BASE_IRQ; |
1275 | irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; |
1276 | |
1277 | /* WRITE IRQ - we need it only if we have vector TX */ |
1278 | if ((vp->options & VECTOR_TX) > 0) { |
1279 | err = um_request_irq( |
1280 | irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, |
1281 | IRQ_WRITE, vector_tx_interrupt, |
1282 | IRQF_SHARED, dev->name, dev); |
1283 | if (err < 0) { |
1284 | netdev_err(dev, |
1285 | format: "vector_open: failed to get tx irq(%d)\n" , err); |
1286 | err = -ENETUNREACH; |
1287 | goto out_close; |
1288 | } |
1289 | vp->tx_irq = irq_rr + VECTOR_BASE_IRQ; |
1290 | irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; |
1291 | } |
1292 | |
1293 | if ((vp->options & VECTOR_QDISC_BYPASS) != 0) { |
1294 | if (!uml_raw_enable_qdisc_bypass(fd: vp->fds->rx_fd)) |
1295 | vp->options |= VECTOR_BPF; |
1296 | } |
1297 | if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL)) |
1298 | vp->bpf = uml_vector_default_bpf(mac: dev->dev_addr); |
1299 | |
1300 | if (vp->bpf != NULL) |
1301 | uml_vector_attach_bpf(fd: vp->fds->rx_fd, bpf: vp->bpf); |
1302 | |
1303 | netif_start_queue(dev); |
1304 | vector_reset_stats(vp); |
1305 | |
1306 | /* clear buffer - it can happen that the host side of the interface |
1307 | * is full when we get here. In this case, new data is never queued, |
1308 | * SIGIOs never arrive, and the net never works. |
1309 | */ |
1310 | |
1311 | napi_schedule(n: &vp->napi); |
1312 | |
1313 | vdevice = find_device(n: vp->unit); |
1314 | vdevice->opened = 1; |
1315 | |
1316 | if ((vp->options & VECTOR_TX) != 0) |
1317 | add_timer(timer: &vp->tl); |
1318 | return 0; |
1319 | out_close: |
1320 | vector_net_close(dev); |
1321 | return err; |
1322 | } |
1323 | |
1324 | |
1325 | static void vector_net_set_multicast_list(struct net_device *dev) |
1326 | { |
1327 | /* TODO: - we can do some BPF games here */ |
1328 | return; |
1329 | } |
1330 | |
1331 | static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue) |
1332 | { |
1333 | struct vector_private *vp = netdev_priv(dev); |
1334 | |
1335 | vp->estats.tx_timeout_count++; |
1336 | netif_trans_update(dev); |
1337 | schedule_work(work: &vp->reset_tx); |
1338 | } |
1339 | |
1340 | static netdev_features_t vector_fix_features(struct net_device *dev, |
1341 | netdev_features_t features) |
1342 | { |
1343 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); |
1344 | return features; |
1345 | } |
1346 | |
1347 | static int vector_set_features(struct net_device *dev, |
1348 | netdev_features_t features) |
1349 | { |
1350 | struct vector_private *vp = netdev_priv(dev); |
1351 | /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is |
1352 | * no way to negotiate it on raw sockets, so we can change |
1353 | * only our side. |
1354 | */ |
1355 | if (features & NETIF_F_GRO) |
1356 | /* All new frame buffers will be GRO-sized */ |
1357 | vp->req_size = 65536; |
1358 | else |
1359 | /* All new frame buffers will be normal sized */ |
1360 | vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN; |
1361 | return 0; |
1362 | } |
1363 | |
1364 | #ifdef CONFIG_NET_POLL_CONTROLLER |
1365 | static void vector_net_poll_controller(struct net_device *dev) |
1366 | { |
1367 | disable_irq(irq: dev->irq); |
1368 | vector_rx_interrupt(irq: dev->irq, dev_id: dev); |
1369 | enable_irq(irq: dev->irq); |
1370 | } |
1371 | #endif |
1372 | |
1373 | static void vector_net_get_drvinfo(struct net_device *dev, |
1374 | struct ethtool_drvinfo *info) |
1375 | { |
1376 | strscpy(info->driver, DRIVER_NAME); |
1377 | } |
1378 | |
1379 | static int vector_net_load_bpf_flash(struct net_device *dev, |
1380 | struct ethtool_flash *efl) |
1381 | { |
1382 | struct vector_private *vp = netdev_priv(dev); |
1383 | struct vector_device *vdevice; |
1384 | const struct firmware *fw; |
1385 | int result = 0; |
1386 | |
1387 | if (!(vp->options & VECTOR_BPF_FLASH)) { |
1388 | netdev_err(dev, format: "loading firmware not permitted: %s\n" , efl->data); |
1389 | return -1; |
1390 | } |
1391 | |
1392 | spin_lock(lock: &vp->lock); |
1393 | |
1394 | if (vp->bpf != NULL) { |
1395 | if (vp->opened) |
1396 | uml_vector_detach_bpf(fd: vp->fds->rx_fd, bpf: vp->bpf); |
1397 | kfree(objp: vp->bpf->filter); |
1398 | vp->bpf->filter = NULL; |
1399 | } else { |
1400 | vp->bpf = kmalloc(size: sizeof(struct sock_fprog), GFP_ATOMIC); |
1401 | if (vp->bpf == NULL) { |
1402 | netdev_err(dev, format: "failed to allocate memory for firmware\n" ); |
1403 | goto flash_fail; |
1404 | } |
1405 | } |
1406 | |
1407 | vdevice = find_device(n: vp->unit); |
1408 | |
1409 | if (request_firmware(fw: &fw, name: efl->data, device: &vdevice->pdev.dev)) |
1410 | goto flash_fail; |
1411 | |
1412 | vp->bpf->filter = kmemdup(p: fw->data, size: fw->size, GFP_ATOMIC); |
1413 | if (!vp->bpf->filter) |
1414 | goto free_buffer; |
1415 | |
1416 | vp->bpf->len = fw->size / sizeof(struct sock_filter); |
1417 | release_firmware(fw); |
1418 | |
1419 | if (vp->opened) |
1420 | result = uml_vector_attach_bpf(fd: vp->fds->rx_fd, bpf: vp->bpf); |
1421 | |
1422 | spin_unlock(lock: &vp->lock); |
1423 | |
1424 | return result; |
1425 | |
1426 | free_buffer: |
1427 | release_firmware(fw); |
1428 | |
1429 | flash_fail: |
1430 | spin_unlock(lock: &vp->lock); |
1431 | if (vp->bpf != NULL) |
1432 | kfree(objp: vp->bpf->filter); |
1433 | kfree(objp: vp->bpf); |
1434 | vp->bpf = NULL; |
1435 | return -1; |
1436 | } |
1437 | |
1438 | static void vector_get_ringparam(struct net_device *netdev, |
1439 | struct ethtool_ringparam *ring, |
1440 | struct kernel_ethtool_ringparam *kernel_ring, |
1441 | struct netlink_ext_ack *extack) |
1442 | { |
1443 | struct vector_private *vp = netdev_priv(dev: netdev); |
1444 | |
1445 | ring->rx_max_pending = vp->rx_queue->max_depth; |
1446 | ring->tx_max_pending = vp->tx_queue->max_depth; |
1447 | ring->rx_pending = vp->rx_queue->max_depth; |
1448 | ring->tx_pending = vp->tx_queue->max_depth; |
1449 | } |
1450 | |
1451 | static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf) |
1452 | { |
1453 | switch (stringset) { |
1454 | case ETH_SS_TEST: |
1455 | *buf = '\0'; |
1456 | break; |
1457 | case ETH_SS_STATS: |
1458 | memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); |
1459 | break; |
1460 | default: |
1461 | WARN_ON(1); |
1462 | break; |
1463 | } |
1464 | } |
1465 | |
1466 | static int vector_get_sset_count(struct net_device *dev, int sset) |
1467 | { |
1468 | switch (sset) { |
1469 | case ETH_SS_TEST: |
1470 | return 0; |
1471 | case ETH_SS_STATS: |
1472 | return VECTOR_NUM_STATS; |
1473 | default: |
1474 | return -EOPNOTSUPP; |
1475 | } |
1476 | } |
1477 | |
1478 | static void vector_get_ethtool_stats(struct net_device *dev, |
1479 | struct ethtool_stats *estats, |
1480 | u64 *tmp_stats) |
1481 | { |
1482 | struct vector_private *vp = netdev_priv(dev); |
1483 | |
1484 | memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); |
1485 | } |
1486 | |
1487 | static int vector_get_coalesce(struct net_device *netdev, |
1488 | struct ethtool_coalesce *ec, |
1489 | struct kernel_ethtool_coalesce *kernel_coal, |
1490 | struct netlink_ext_ack *extack) |
1491 | { |
1492 | struct vector_private *vp = netdev_priv(dev: netdev); |
1493 | |
1494 | ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ; |
1495 | return 0; |
1496 | } |
1497 | |
1498 | static int vector_set_coalesce(struct net_device *netdev, |
1499 | struct ethtool_coalesce *ec, |
1500 | struct kernel_ethtool_coalesce *kernel_coal, |
1501 | struct netlink_ext_ack *extack) |
1502 | { |
1503 | struct vector_private *vp = netdev_priv(dev: netdev); |
1504 | |
1505 | vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000; |
1506 | if (vp->coalesce == 0) |
1507 | vp->coalesce = 1; |
1508 | return 0; |
1509 | } |
1510 | |
1511 | static const struct ethtool_ops vector_net_ethtool_ops = { |
1512 | .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS, |
1513 | .get_drvinfo = vector_net_get_drvinfo, |
1514 | .get_link = ethtool_op_get_link, |
1515 | .get_ts_info = ethtool_op_get_ts_info, |
1516 | .get_ringparam = vector_get_ringparam, |
1517 | .get_strings = vector_get_strings, |
1518 | .get_sset_count = vector_get_sset_count, |
1519 | .get_ethtool_stats = vector_get_ethtool_stats, |
1520 | .get_coalesce = vector_get_coalesce, |
1521 | .set_coalesce = vector_set_coalesce, |
1522 | .flash_device = vector_net_load_bpf_flash, |
1523 | }; |
1524 | |
1525 | |
1526 | static const struct net_device_ops vector_netdev_ops = { |
1527 | .ndo_open = vector_net_open, |
1528 | .ndo_stop = vector_net_close, |
1529 | .ndo_start_xmit = vector_net_start_xmit, |
1530 | .ndo_set_rx_mode = vector_net_set_multicast_list, |
1531 | .ndo_tx_timeout = vector_net_tx_timeout, |
1532 | .ndo_set_mac_address = eth_mac_addr, |
1533 | .ndo_validate_addr = eth_validate_addr, |
1534 | .ndo_fix_features = vector_fix_features, |
1535 | .ndo_set_features = vector_set_features, |
1536 | #ifdef CONFIG_NET_POLL_CONTROLLER |
1537 | .ndo_poll_controller = vector_net_poll_controller, |
1538 | #endif |
1539 | }; |
1540 | |
1541 | static void vector_timer_expire(struct timer_list *t) |
1542 | { |
1543 | struct vector_private *vp = from_timer(vp, t, tl); |
1544 | |
1545 | vp->estats.tx_kicks++; |
1546 | napi_schedule(n: &vp->napi); |
1547 | } |
1548 | |
1549 | |
1550 | |
1551 | static void vector_eth_configure( |
1552 | int n, |
1553 | struct arglist *def |
1554 | ) |
1555 | { |
1556 | struct vector_device *device; |
1557 | struct net_device *dev; |
1558 | struct vector_private *vp; |
1559 | int err; |
1560 | |
1561 | device = kzalloc(size: sizeof(*device), GFP_KERNEL); |
1562 | if (device == NULL) { |
1563 | printk(KERN_ERR "eth_configure failed to allocate struct " |
1564 | "vector_device\n" ); |
1565 | return; |
1566 | } |
1567 | dev = alloc_etherdev(sizeof(struct vector_private)); |
1568 | if (dev == NULL) { |
1569 | printk(KERN_ERR "eth_configure: failed to allocate struct " |
1570 | "net_device for vec%d\n" , n); |
1571 | goto out_free_device; |
1572 | } |
1573 | |
1574 | dev->mtu = get_mtu(def); |
1575 | |
1576 | INIT_LIST_HEAD(list: &device->list); |
1577 | device->unit = n; |
1578 | |
1579 | /* If this name ends up conflicting with an existing registered |
1580 | * netdevice, that is OK, register_netdev{,ice}() will notice this |
1581 | * and fail. |
1582 | */ |
1583 | snprintf(buf: dev->name, size: sizeof(dev->name), fmt: "vec%d" , n); |
1584 | uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(ifspec: def, token: "mac" )); |
1585 | vp = netdev_priv(dev); |
1586 | |
1587 | /* sysfs register */ |
1588 | if (!driver_registered) { |
1589 | platform_driver_register(¨_net_driver); |
1590 | driver_registered = 1; |
1591 | } |
1592 | device->pdev.id = n; |
1593 | device->pdev.name = DRIVER_NAME; |
1594 | device->pdev.dev.release = vector_device_release; |
1595 | dev_set_drvdata(dev: &device->pdev.dev, data: device); |
1596 | if (platform_device_register(&device->pdev)) |
1597 | goto out_free_netdev; |
1598 | SET_NETDEV_DEV(dev, &device->pdev.dev); |
1599 | |
1600 | device->dev = dev; |
1601 | |
1602 | *vp = ((struct vector_private) |
1603 | { |
1604 | .list = LIST_HEAD_INIT(vp->list), |
1605 | .dev = dev, |
1606 | .unit = n, |
1607 | .options = get_transport_options(def), |
1608 | .rx_irq = 0, |
1609 | .tx_irq = 0, |
1610 | .parsed = def, |
1611 | .max_packet = get_mtu(def) + ETH_HEADER_OTHER, |
1612 | /* TODO - we need to calculate headroom so that ip header |
1613 | * is 16 byte aligned all the time |
1614 | */ |
1615 | .headroom = get_headroom(def), |
1616 | .form_header = NULL, |
1617 | .verify_header = NULL, |
1618 | .header_rxbuffer = NULL, |
1619 | .header_txbuffer = NULL, |
1620 | .header_size = 0, |
1621 | .rx_header_size = 0, |
1622 | .rexmit_scheduled = false, |
1623 | .opened = false, |
1624 | .transport_data = NULL, |
1625 | .in_write_poll = false, |
1626 | .coalesce = 2, |
1627 | .req_size = get_req_size(def), |
1628 | .in_error = false, |
1629 | .bpf = NULL |
1630 | }); |
1631 | |
1632 | dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); |
1633 | INIT_WORK(&vp->reset_tx, vector_reset_tx); |
1634 | |
1635 | timer_setup(&vp->tl, vector_timer_expire, 0); |
1636 | spin_lock_init(&vp->lock); |
1637 | |
1638 | /* FIXME */ |
1639 | dev->netdev_ops = &vector_netdev_ops; |
1640 | dev->ethtool_ops = &vector_net_ethtool_ops; |
1641 | dev->watchdog_timeo = (HZ >> 1); |
1642 | /* primary IRQ - fixme */ |
1643 | dev->irq = 0; /* we will adjust this once opened */ |
1644 | |
1645 | rtnl_lock(); |
1646 | err = register_netdevice(dev); |
1647 | rtnl_unlock(); |
1648 | if (err) |
1649 | goto out_undo_user_init; |
1650 | |
1651 | spin_lock(lock: &vector_devices_lock); |
1652 | list_add(new: &device->list, head: &vector_devices); |
1653 | spin_unlock(lock: &vector_devices_lock); |
1654 | |
1655 | return; |
1656 | |
1657 | out_undo_user_init: |
1658 | return; |
1659 | out_free_netdev: |
1660 | free_netdev(dev); |
1661 | out_free_device: |
1662 | kfree(objp: device); |
1663 | } |
1664 | |
1665 | |
1666 | |
1667 | |
1668 | /* |
1669 | * Invoked late in the init |
1670 | */ |
1671 | |
1672 | static int __init vector_init(void) |
1673 | { |
1674 | struct list_head *ele; |
1675 | struct vector_cmd_line_arg *def; |
1676 | struct arglist *parsed; |
1677 | |
1678 | list_for_each(ele, &vec_cmd_line) { |
1679 | def = list_entry(ele, struct vector_cmd_line_arg, list); |
1680 | parsed = uml_parse_vector_ifspec(arg: def->arguments); |
1681 | if (parsed != NULL) |
1682 | vector_eth_configure(n: def->unit, def: parsed); |
1683 | } |
1684 | return 0; |
1685 | } |
1686 | |
1687 | |
1688 | /* Invoked at initial argument parsing, only stores |
1689 | * arguments until a proper vector_init is called |
1690 | * later |
1691 | */ |
1692 | |
1693 | static int __init vector_setup(char *str) |
1694 | { |
1695 | char *error; |
1696 | int n, err; |
1697 | struct vector_cmd_line_arg *new; |
1698 | |
1699 | err = vector_parse(str, index_out: &n, str_out: &str, error_out: &error); |
1700 | if (err) { |
1701 | printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n" , |
1702 | str, error); |
1703 | return 1; |
1704 | } |
1705 | new = memblock_alloc(size: sizeof(*new), SMP_CACHE_BYTES); |
1706 | if (!new) |
1707 | panic(fmt: "%s: Failed to allocate %zu bytes\n" , __func__, |
1708 | sizeof(*new)); |
1709 | INIT_LIST_HEAD(list: &new->list); |
1710 | new->unit = n; |
1711 | new->arguments = str; |
1712 | list_add_tail(new: &new->list, head: &vec_cmd_line); |
1713 | return 1; |
1714 | } |
1715 | |
1716 | __setup("vec" , vector_setup); |
1717 | __uml_help(vector_setup, |
1718 | "vec[0-9]+:<option>=<value>,<option>=<value>\n" |
1719 | " Configure a vector io network device.\n\n" |
1720 | ); |
1721 | |
1722 | late_initcall(vector_init); |
1723 | |
1724 | static struct mc_device vector_mc = { |
1725 | .list = LIST_HEAD_INIT(vector_mc.list), |
1726 | .name = "vec" , |
1727 | .config = vector_config, |
1728 | .get_config = NULL, |
1729 | .id = vector_id, |
1730 | .remove = vector_remove, |
1731 | }; |
1732 | |
1733 | #ifdef CONFIG_INET |
1734 | static int vector_inetaddr_event( |
1735 | struct notifier_block *this, |
1736 | unsigned long event, |
1737 | void *ptr) |
1738 | { |
1739 | return NOTIFY_DONE; |
1740 | } |
1741 | |
1742 | static struct notifier_block vector_inetaddr_notifier = { |
1743 | .notifier_call = vector_inetaddr_event, |
1744 | }; |
1745 | |
1746 | static void inet_register(void) |
1747 | { |
1748 | register_inetaddr_notifier(nb: &vector_inetaddr_notifier); |
1749 | } |
1750 | #else |
1751 | static inline void inet_register(void) |
1752 | { |
1753 | } |
1754 | #endif |
1755 | |
1756 | static int vector_net_init(void) |
1757 | { |
1758 | mconsole_register_dev(new: &vector_mc); |
1759 | inet_register(); |
1760 | return 0; |
1761 | } |
1762 | |
1763 | __initcall(vector_net_init); |
1764 | |
1765 | |
1766 | |
1767 | |