netback.c source code [linux/drivers/net/xen-netback/netback.c]

1	/*
2	* Back-end of the driver for virtual network devices. This portion of the
3	* driver exports a 'unified' network-device interface that can be accessed
4	* by any operating system that implements a compatible front end. A
5	* reference front-end implementation can be found in:
6	* drivers/net/xen-netfront.c
7	*
8	* Copyright (c) 2002-2005, K A Fraser
9	*
10	* This program is free software; you can redistribute it and/or
11	* modify it under the terms of the GNU General Public License version 2
12	* as published by the Free Software Foundation; or, when distributed
13	* separately from the Linux kernel or incorporated into other
14	* software packages, subject to the following license:
15	*
16	* Permission is hereby granted, free of charge, to any person obtaining a copy
17	* of this source file (the "Software"), to deal in the Software without
18	* restriction, including without limitation the rights to use, copy, modify,
19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
20	* and to permit persons to whom the Software is furnished to do so, subject to
21	* the following conditions:
22	*
23	* The above copyright notice and this permission notice shall be included in
24	* all copies or substantial portions of the Software.
25	*
26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32	* IN THE SOFTWARE.
33	*/
34
35	#include "common.h"
36
37	#include <linux/kthread.h>
38	#include <linux/if_vlan.h>
39	#include <linux/udp.h>
40	#include <linux/highmem.h>
41
42	#include <net/tcp.h>
43
44	#include <xen/xen.h>
45	#include <xen/events.h>
46	#include <xen/interface/memory.h>
47	#include <xen/page.h>
48
49	#include <asm/xen/hypercall.h>
50
51	/ Provide an option to disable split event channels at load time as*
52	* event channels are limited resource. Split event channels are
53	* enabled by default.
54	*/
55	bool separate_tx_rx_irq = true;
56	module_param(separate_tx_rx_irq, bool, `0644`);
57
58	/ The time that packets can stay on the guest Rx internal queue*
59	* before they are dropped.
60	*/
61	unsigned int rx_drain_timeout_msecs = `10000`;
62	module_param(rx_drain_timeout_msecs, uint, `0444`);
63
64	/ The length of time before the frontend is considered unresponsive*
65	* because it isn't providing Rx slots.
66	*/
67	unsigned int rx_stall_timeout_msecs = `60000`;
68	module_param(rx_stall_timeout_msecs, uint, `0444`);
69
70	#define MAX_QUEUES_DEFAULT 8
71	unsigned int xenvif_max_queues;
72	module_param_named(max_queues, xenvif_max_queues, uint, `0644`);
73	MODULE_PARM_DESC(max_queues,
74	"Maximum number of queues per virtual interface");
75
76	/*
77	* This is the maximum slots a skb can have. If a guest sends a skb
78	* which exceeds this limit it is considered malicious.
79	*/
80	#define FATAL_SKB_SLOTS_DEFAULT 20
81	static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
82	module_param(fatal_skb_slots, uint, `0444`);
83
84	/ The amount to copy out of the first guest Tx slot into the skb's*
85	* linear area. If the first slot has more data, it will be mapped
86	* and put into the first frag.
87	*
88	* This is sized to avoid pulling headers from the frags for most
89	* TCP/IP packets.
90	*/
91	#define XEN_NETBACK_TX_COPY_LEN 128
92
93	/ This is the maximum number of flows in the hash cache. /
94	#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
95	unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
96	module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, `0644`);
97	MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
98
99	/ The module parameter tells that we have to put data*
100	* for xen-netfront with the XDP_PACKET_HEADROOM offset
101	* needed for XDP processing
102	*/
103	bool provides_xdp_headroom = true;
104	module_param(provides_xdp_headroom, bool, `0644`);
105
106	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
107	s8 status);
108
109	static void make_tx_response(struct xenvif_queue *queue,
110	const struct xen_netif_tx_request *txp,
111	unsigned int extra_count,
112	s8 status);
113
114	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
115
116	static inline int tx_work_todo(struct xenvif_queue *queue);
117
118	static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
119	u16 idx)
120	{
121	return page_to_pfn(queue->mmap_pages[idx]);
122	}
123
124	static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
125	u16 idx)
126	{
127	return (unsigned long)pfn_to_kaddr(pfn: idx_to_pfn(queue, idx));
128	}
129
130	#define callback_param(vif, pending_idx) \
131	(vif->pending_tx_info[pending_idx].callback_struct)
132
133	/ Find the containing VIF's structure from a pointer in pending_tx_info array*
134	*/
135	static inline struct xenvif_queue ubuf_to_queue(const* struct ubuf_info_msgzc *ubuf)
136	{
137	u16 pending_idx = ubuf->desc;
138	struct pending_tx_info *temp =
139	container_of(ubuf, struct pending_tx_info, callback_struct);
140	return container_of(temp - pending_idx,
141	struct xenvif_queue,
142	pending_tx_info[`0`]);
143	}
144
145	static u16 frag_get_pending_idx(skb_frag_t *frag)
146	{
147	return (u16)skb_frag_off(frag);
148	}
149
150	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
151	{
152	skb_frag_off_set(frag, offset: pending_idx);
153	}
154
155	static inline pending_ring_idx_t pending_index(unsigned i)
156	{
157	return i & (MAX_PENDING_REQS-`1`);
158	}
159
160	void xenvif_kick_thread(struct xenvif_queue *queue)
161	{
162	wake_up(&queue->wq);
163	}
164
165	void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
166	{
167	int more_to_do;
168
169	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
170
171	if (more_to_do)
172	napi_schedule(n: &queue->napi);
173	else if (atomic_fetch_andnot(NETBK_TX_EOI \| NETBK_COMMON_EOI,
174	v: &queue->eoi_pending) &
175	(NETBK_TX_EOI \| NETBK_COMMON_EOI))
176	xen_irq_lateeoi(irq: queue->tx_irq, eoi_flags: `0`);
177	}
178
179	static void tx_add_credit(struct xenvif_queue *queue)
180	{
181	unsigned long max_burst, max_credit;
182
183	/*
184	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
185	* Otherwise the interface can seize up due to insufficient credit.
186	*/
187	max_burst = max(`131072UL`, queue->credit_bytes);
188
189	/ Take care that adding a new chunk of credit doesn't wrap to zero. /
190	max_credit = queue->remaining_credit + queue->credit_bytes;
191	if (max_credit < queue->remaining_credit)
192	max_credit = ULONG_MAX; / wrapped: clamp to ULONG_MAX /
193
194	queue->remaining_credit = min(max_credit, max_burst);
195	queue->rate_limited = false;
196	}
197
198	void xenvif_tx_credit_callback(struct timer_list *t)
199	{
200	struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
201	tx_add_credit(queue);
202	xenvif_napi_schedule_or_enable_events(queue);
203	}
204
205	static void xenvif_tx_err(struct xenvif_queue *queue,
206	struct xen_netif_tx_request *txp,
207	unsigned int extra_count, RING_IDX end)
208	{
209	RING_IDX cons = queue->tx.req_cons;
210
211	do {
212	make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
213	if (cons == end)
214	break;
215	RING_COPY_REQUEST(&queue->tx, cons++, txp);
216	extra_count = `0`; / only the first frag can have extras /
217	} while (`1`);
218	queue->tx.req_cons = cons;
219	}
220
221	static void xenvif_fatal_tx_err(struct xenvif *vif)
222	{
223	netdev_err(dev: vif->dev, format: "fatal error; disabling device\n");
224	vif->disabled = true;
225	/ Disable the vif from queue 0's kthread /
226	if (vif->num_queues)
227	xenvif_kick_thread(queue: &vif->queues[`0`]);
228	}
229
230	static int xenvif_count_requests(struct xenvif_queue *queue,
231	struct xen_netif_tx_request *first,
232	unsigned int extra_count,
233	struct xen_netif_tx_request *txp,
234	int work_to_do)
235	{
236	RING_IDX cons = queue->tx.req_cons;
237	int slots = `0`;
238	int drop_err = `0`;
239	int more_data;
240
241	if (!(first->flags & XEN_NETTXF_more_data))
242	return `0`;
243
244	do {
245	struct xen_netif_tx_request dropped_tx = { `0` };
246
247	if (slots >= work_to_do) {
248	netdev_err(dev: queue->vif->dev,
249	format: "Asked for %d slots but exceeds this limit\n",
250	work_to_do);
251	xenvif_fatal_tx_err(vif: queue->vif);
252	return -ENODATA;
253	}
254
255	/ This guest is really using too many slots and*
256	* considered malicious.
257	*/
258	if (unlikely(slots >= fatal_skb_slots)) {
259	netdev_err(dev: queue->vif->dev,
260	format: "Malicious frontend using %d slots, threshold %u\n",
261	slots, fatal_skb_slots);
262	xenvif_fatal_tx_err(vif: queue->vif);
263	return -E2BIG;
264	}
265
266	/ Xen network protocol had implicit dependency on*
267	* MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
268	* the historical MAX_SKB_FRAGS value 18 to honor the
269	* same behavior as before. Any packet using more than
270	* 18 slots but less than fatal_skb_slots slots is
271	* dropped
272	*/
273	if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
274	if (net_ratelimit())
275	netdev_dbg(queue->vif->dev,
276	"Too many slots (%d) exceeding limit (%d), dropping packet\n",
277	slots, XEN_NETBK_LEGACY_SLOTS_MAX);
278	drop_err = -E2BIG;
279	}
280
281	if (drop_err)
282	txp = &dropped_tx;
283
284	RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
285
286	/ If the guest submitted a frame >= 64 KiB then*
287	* first->size overflowed and following slots will
288	* appear to be larger than the frame.
289	*
290	* This cannot be fatal error as there are buggy
291	* frontends that do this.
292	*
293	* Consume all slots and drop the packet.
294	*/
295	if (!drop_err && txp->size > first->size) {
296	if (net_ratelimit())
297	netdev_dbg(queue->vif->dev,
298	"Invalid tx request, slot size %u > remaining size %u\n",
299	txp->size, first->size);
300	drop_err = -EIO;
301	}
302
303	first->size -= txp->size;
304	slots++;
305
306	if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
307	netdev_err(dev: queue->vif->dev, format: "Cross page boundary, txp->offset: %u, size: %u\n",
308	txp->offset, txp->size);
309	xenvif_fatal_tx_err(vif: queue->vif);
310	return -EINVAL;
311	}
312
313	more_data = txp->flags & XEN_NETTXF_more_data;
314
315	if (!drop_err)
316	txp++;
317
318	} while (more_data);
319
320	if (drop_err) {
321	xenvif_tx_err(queue, txp: first, extra_count, end: cons + slots);
322	return drop_err;
323	}
324
325	return slots;
326	}
327
328
329	struct xenvif_tx_cb {
330	u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + `1`];
331	u8 copy_count;
332	u32 split_mask;
333	};
334
335	#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
336	#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
337	#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
338
339	static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
340	u16 pending_idx,
341	struct xen_netif_tx_request *txp,
342	unsigned int extra_count,
343	struct gnttab_map_grant_ref *mop)
344	{
345	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
346	gnttab_set_map_op(map: mop, addr: idx_to_kaddr(queue, idx: pending_idx),
347	GNTMAP_host_map \| GNTMAP_readonly,
348	ref: txp->gref, domid: queue->vif->domid);
349
350	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
351	sizeof(*txp));
352	queue->pending_tx_info[pending_idx].extra_count = extra_count;
353	}
354
355	static inline struct sk_buff xenvif_alloc_skb(unsigned* int size)
356	{
357	struct sk_buff *skb =
358	alloc_skb(size: size + NET_SKB_PAD + NET_IP_ALIGN,
359	GFP_ATOMIC \| __GFP_NOWARN);
360
361	BUILD_BUG_ON(sizeof(XENVIF_TX_CB(skb)) > sizeof*(skb->cb));
362	if (unlikely(skb == NULL))
363	return NULL;
364
365	/ Packets passed to netif_rx() must have some headroom. /
366	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
367
368	/ Initialize it here to avoid later surprises /
369	skb_shinfo(skb)->destructor_arg = NULL;
370
371	return skb;
372	}
373
374	static void xenvif_get_requests(struct xenvif_queue *queue,
375	struct sk_buff *skb,
376	struct xen_netif_tx_request *first,
377	struct xen_netif_tx_request *txfrags,
378	unsigned *copy_ops,
379	unsigned *map_ops,
380	unsigned int frag_overflow,
381	struct sk_buff *nskb,
382	unsigned int extra_count,
383	unsigned int data_len)
384	{
385	struct skb_shared_info *shinfo = skb_shinfo(skb);
386	skb_frag_t *frags = shinfo->frags;
387	u16 pending_idx;
388	pending_ring_idx_t index;
389	unsigned int nr_slots;
390	struct gnttab_copy cop = queue->tx_copy_ops + copy_ops;
391	struct gnttab_map_grant_ref gop = queue->tx_map_ops + map_ops;
392	struct xen_netif_tx_request *txp = first;
393
394	nr_slots = shinfo->nr_frags + frag_overflow + `1`;
395
396	copy_count(skb) = `0`;
397	XENVIF_TX_CB(skb)->split_mask = `0`;
398
399	/ Create copy ops for exactly data_len bytes into the skb head. /
400	__skb_put(skb, len: data_len);
401	while (data_len > `0`) {
402	int amount = data_len > txp->size ? txp->size : data_len;
403	bool split = false;
404
405	cop->source.u.ref = txp->gref;
406	cop->source.domid = queue->vif->domid;
407	cop->source.offset = txp->offset;
408
409	cop->dest.domid = DOMID_SELF;
410	cop->dest.offset = (offset_in_page(skb->data +
411	skb_headlen(skb) -
412	data_len)) & ~XEN_PAGE_MASK;
413	cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
414	- data_len);
415
416	/ Don't cross local page boundary! /
417	if (cop->dest.offset + amount > XEN_PAGE_SIZE) {
418	amount = XEN_PAGE_SIZE - cop->dest.offset;
419	XENVIF_TX_CB(skb)->split_mask \|= `1U` << copy_count(skb);
420	split = true;
421	}
422
423	cop->len = amount;
424	cop->flags = GNTCOPY_source_gref;
425
426	index = pending_index(i: queue->pending_cons);
427	pending_idx = queue->pending_ring[index];
428	callback_param(queue, pending_idx).ctx = NULL;
429	copy_pending_idx(skb, copy_count(skb)) = pending_idx;
430	if (!split)
431	copy_count(skb)++;
432
433	cop++;
434	data_len -= amount;
435
436	if (amount == txp->size) {
437	/ The copy op covered the full tx_request /
438
439	memcpy(&queue->pending_tx_info[pending_idx].req,
440	txp, sizeof(*txp));
441	queue->pending_tx_info[pending_idx].extra_count =
442	(txp == first) ? extra_count : `0`;
443
444	if (txp == first)
445	txp = txfrags;
446	else
447	txp++;
448	queue->pending_cons++;
449	nr_slots--;
450	} else {
451	/ The copy op partially covered the tx_request.*
452	* The remainder will be mapped or copied in the next
453	* iteration.
454	*/
455	txp->offset += amount;
456	txp->size -= amount;
457	}
458	}
459
460	for (shinfo->nr_frags = `0`; nr_slots > `0` && shinfo->nr_frags < MAX_SKB_FRAGS;
461	nr_slots--) {
462	if (unlikely(!txp->size)) {
463	make_tx_response(queue, txp, extra_count: `0`, XEN_NETIF_RSP_OKAY);
464	++txp;
465	continue;
466	}
467
468	index = pending_index(i: queue->pending_cons++);
469	pending_idx = queue->pending_ring[index];
470	xenvif_tx_create_map_op(queue, pending_idx, txp,
471	extra_count: txp == first ? extra_count : `0`, mop: gop);
472	frag_set_pending_idx(frag: &frags[shinfo->nr_frags], pending_idx);
473	++shinfo->nr_frags;
474	++gop;
475
476	if (txp == first)
477	txp = txfrags;
478	else
479	txp++;
480	}
481
482	if (nr_slots > `0`) {
483
484	shinfo = skb_shinfo(nskb);
485	frags = shinfo->frags;
486
487	for (shinfo->nr_frags = `0`; shinfo->nr_frags < nr_slots; ++txp) {
488	if (unlikely(!txp->size)) {
489	make_tx_response(queue, txp, extra_count: `0`,
490	XEN_NETIF_RSP_OKAY);
491	continue;
492	}
493
494	index = pending_index(i: queue->pending_cons++);
495	pending_idx = queue->pending_ring[index];
496	xenvif_tx_create_map_op(queue, pending_idx, txp, extra_count: `0`,
497	mop: gop);
498	frag_set_pending_idx(frag: &frags[shinfo->nr_frags],
499	pending_idx);
500	++shinfo->nr_frags;
501	++gop;
502	}
503
504	if (shinfo->nr_frags) {
505	skb_shinfo(skb)->frag_list = nskb;
506	nskb = NULL;
507	}
508	}
509
510	if (nskb) {
511	/ A frag_list skb was allocated but it is no longer needed*
512	* because enough slots were converted to copy ops above or some
513	* were empty.
514	*/
515	kfree_skb(skb: nskb);
516	}
517
518	(*copy_ops) = cop - queue->tx_copy_ops;
519	(*map_ops) = gop - queue->tx_map_ops;
520	}
521
522	static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
523	u16 pending_idx,
524	grant_handle_t handle)
525	{
526	if (unlikely(queue->grant_tx_handle[pending_idx] !=
527	NETBACK_INVALID_HANDLE)) {
528	netdev_err(dev: queue->vif->dev,
529	format: "Trying to overwrite active handle! pending_idx: 0x%x\n",
530	pending_idx);
531	BUG();
532	}
533	queue->grant_tx_handle[pending_idx] = handle;
534	}
535
536	static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
537	u16 pending_idx)
538	{
539	if (unlikely(queue->grant_tx_handle[pending_idx] ==
540	NETBACK_INVALID_HANDLE)) {
541	netdev_err(dev: queue->vif->dev,
542	format: "Trying to unmap invalid handle! pending_idx: 0x%x\n",
543	pending_idx);
544	BUG();
545	}
546	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
547	}
548
549	static int xenvif_tx_check_gop(struct xenvif_queue *queue,
550	struct sk_buff *skb,
551	struct gnttab_map_grant_ref **gopp_map,
552	struct gnttab_copy **gopp_copy)
553	{
554	struct gnttab_map_grant_ref gop_map = gopp_map;
555	u16 pending_idx;
556	/ This always points to the shinfo of the skb being checked, which*
557	* could be either the first or the one on the frag_list
558	*/
559	struct skb_shared_info *shinfo = skb_shinfo(skb);
560	/ If this is non-NULL, we are currently checking the frag_list skb, and*
561	* this points to the shinfo of the first one
562	*/
563	struct skb_shared_info *first_shinfo = NULL;
564	int nr_frags = shinfo->nr_frags;
565	const bool sharedslot = nr_frags &&
566	frag_get_pending_idx(frag: &shinfo->frags[`0`]) ==
567	copy_pending_idx(skb, copy_count(skb) - `1`);
568	int i, err = `0`;
569
570	for (i = `0`; i < copy_count(skb); i++) {
571	int newerr;
572
573	/ Check status of header. /
574	pending_idx = copy_pending_idx(skb, i);
575
576	newerr = (*gopp_copy)->status;
577
578	/ Split copies need to be handled together. /
579	if (XENVIF_TX_CB(skb)->split_mask & (`1U` << i)) {
580	(*gopp_copy)++;
581	if (!newerr)
582	newerr = (*gopp_copy)->status;
583	}
584	if (likely(!newerr)) {
585	/ The first frag might still have this slot mapped /
586	if (i < copy_count(skb) - `1` \|\| !sharedslot)
587	xenvif_idx_release(queue, pending_idx,
588	XEN_NETIF_RSP_OKAY);
589	} else {
590	err = newerr;
591	if (net_ratelimit())
592	netdev_dbg(queue->vif->dev,
593	"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
594	(*gopp_copy)->status,
595	pending_idx,
596	(*gopp_copy)->source.u.ref);
597	/ The first frag might still have this slot mapped /
598	if (i < copy_count(skb) - `1` \|\| !sharedslot)
599	xenvif_idx_release(queue, pending_idx,
600	XEN_NETIF_RSP_ERROR);
601	}
602	(*gopp_copy)++;
603	}
604
605	check_frags:
606	for (i = `0`; i < nr_frags; i++, gop_map++) {
607	int j, newerr;
608
609	pending_idx = frag_get_pending_idx(frag: &shinfo->frags[i]);
610
611	/ Check error status: if okay then remember grant handle. /
612	newerr = gop_map->status;
613
614	if (likely(!newerr)) {
615	xenvif_grant_handle_set(queue,
616	pending_idx,
617	handle: gop_map->handle);
618	/ Had a previous error? Invalidate this fragment. /
619	if (unlikely(err)) {
620	xenvif_idx_unmap(queue, pending_idx);
621	/ If the mapping of the first frag was OK, but*
622	* the header's copy failed, and they are
623	* sharing a slot, send an error
624	*/
625	if (i == `0` && !first_shinfo && sharedslot)
626	xenvif_idx_release(queue, pending_idx,
627	XEN_NETIF_RSP_ERROR);
628	else
629	xenvif_idx_release(queue, pending_idx,
630	XEN_NETIF_RSP_OKAY);
631	}
632	continue;
633	}
634
635	/ Error on this fragment: respond to client with an error. /
636	if (net_ratelimit())
637	netdev_dbg(queue->vif->dev,
638	"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
639	i,
640	gop_map->status,
641	pending_idx,
642	gop_map->ref);
643
644	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
645
646	/ Not the first error? Preceding frags already invalidated. /
647	if (err)
648	continue;
649
650	/ Invalidate preceding fragments of this skb. /
651	for (j = `0`; j < i; j++) {
652	pending_idx = frag_get_pending_idx(frag: &shinfo->frags[j]);
653	xenvif_idx_unmap(queue, pending_idx);
654	xenvif_idx_release(queue, pending_idx,
655	XEN_NETIF_RSP_OKAY);
656	}
657
658	/ And if we found the error while checking the frag_list, unmap*
659	* the first skb's frags
660	*/
661	if (first_shinfo) {
662	for (j = `0`; j < first_shinfo->nr_frags; j++) {
663	pending_idx = frag_get_pending_idx(frag: &first_shinfo->frags[j]);
664	xenvif_idx_unmap(queue, pending_idx);
665	xenvif_idx_release(queue, pending_idx,
666	XEN_NETIF_RSP_OKAY);
667	}
668	}
669
670	/ Remember the error: invalidate all subsequent fragments. /
671	err = newerr;
672	}
673
674	if (skb_has_frag_list(skb) && !first_shinfo) {
675	first_shinfo = shinfo;
676	shinfo = skb_shinfo(shinfo->frag_list);
677	nr_frags = shinfo->nr_frags;
678
679	goto check_frags;
680	}
681
682	*gopp_map = gop_map;
683	return err;
684	}
685
686	static void xenvif_fill_frags(struct xenvif_queue queue, struct* sk_buff *skb)
687	{
688	struct skb_shared_info *shinfo = skb_shinfo(skb);
689	int nr_frags = shinfo->nr_frags;
690	int i;
691	u16 prev_pending_idx = INVALID_PENDING_IDX;
692
693	for (i = `0`; i < nr_frags; i++) {
694	skb_frag_t *frag = shinfo->frags + i;
695	struct xen_netif_tx_request *txp;
696	struct page *page;
697	u16 pending_idx;
698
699	pending_idx = frag_get_pending_idx(frag);
700
701	/ If this is not the first frag, chain it to the previous/
702	if (prev_pending_idx == INVALID_PENDING_IDX)
703	skb_shinfo(skb)->destructor_arg =
704	&callback_param(queue, pending_idx);
705	else
706	callback_param(queue, prev_pending_idx).ctx =
707	&callback_param(queue, pending_idx);
708
709	callback_param(queue, pending_idx).ctx = NULL;
710	prev_pending_idx = pending_idx;
711
712	txp = &queue->pending_tx_info[pending_idx].req;
713	page = virt_to_page((void *)idx_to_kaddr(queue, pending_idx));
714	__skb_fill_page_desc(skb, i, page, off: txp->offset, size: txp->size);
715	skb->len += txp->size;
716	skb->data_len += txp->size;
717	skb->truesize += txp->size;
718
719	/ Take an extra reference to offset network stack's put_page /
720	get_page(page: queue->mmap_pages[pending_idx]);
721	}
722	}
723
724	static int xenvif_get_extras(struct xenvif_queue *queue,
725	struct xen_netif_extra_info *extras,
726	unsigned int *extra_count,
727	int work_to_do)
728	{
729	struct xen_netif_extra_info extra;
730	RING_IDX cons = queue->tx.req_cons;
731
732	do {
733	if (unlikely(work_to_do-- <= `0`)) {
734	netdev_err(dev: queue->vif->dev, format: "Missing extra info\n");
735	xenvif_fatal_tx_err(vif: queue->vif);
736	return -EBADR;
737	}
738
739	RING_COPY_REQUEST(&queue->tx, cons, &extra);
740
741	queue->tx.req_cons = ++cons;
742	(*extra_count)++;
743
744	if (unlikely(!extra.type \|\|
745	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
746	netdev_err(dev: queue->vif->dev,
747	format: "Invalid extra type: %d\n", extra.type);
748	xenvif_fatal_tx_err(vif: queue->vif);
749	return -EINVAL;
750	}
751
752	memcpy(&extras[extra.type - `1`], &extra, sizeof(extra));
753	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
754
755	return work_to_do;
756	}
757
758	static int xenvif_set_skb_gso(struct xenvif *vif,
759	struct sk_buff *skb,
760	struct xen_netif_extra_info *gso)
761	{
762	if (!gso->u.gso.size) {
763	netdev_err(dev: vif->dev, format: "GSO size must not be zero.\n");
764	xenvif_fatal_tx_err(vif);
765	return -EINVAL;
766	}
767
768	switch (gso->u.gso.type) {
769	case XEN_NETIF_GSO_TYPE_TCPV4:
770	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
771	break;
772	case XEN_NETIF_GSO_TYPE_TCPV6:
773	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
774	break;
775	default:
776	netdev_err(dev: vif->dev, format: "Bad GSO type %d.\n", gso->u.gso.type);
777	xenvif_fatal_tx_err(vif);
778	return -EINVAL;
779	}
780
781	skb_shinfo(skb)->gso_size = gso->u.gso.size;
782	/ gso_segs will be calculated later /
783
784	return `0`;
785	}
786
787	static int checksum_setup(struct xenvif_queue queue, struct* sk_buff *skb)
788	{
789	bool recalculate_partial_csum = false;
790
791	/ A GSO SKB must be CHECKSUM_PARTIAL. However some buggy*
792	* peers can fail to set NETRXF_csum_blank when sending a GSO
793	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
794	* recalculate the partial checksum.
795	*/
796	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
797	queue->stats.rx_gso_checksum_fixup++;
798	skb->ip_summed = CHECKSUM_PARTIAL;
799	recalculate_partial_csum = true;
800	}
801
802	/ A non-CHECKSUM_PARTIAL SKB does not require setup. /
803	if (skb->ip_summed != CHECKSUM_PARTIAL)
804	return `0`;
805
806	return skb_checksum_setup(skb, recalculate: recalculate_partial_csum);
807	}
808
809	static bool tx_credit_exceeded(struct xenvif_queue queue, unsigned* size)
810	{
811	u64 now = get_jiffies_64();
812	u64 next_credit = queue->credit_window_start +
813	msecs_to_jiffies(m: queue->credit_usec / `1000`);
814
815	/ Timer could already be pending in rare cases. /
816	if (timer_pending(timer: &queue->credit_timeout)) {
817	queue->rate_limited = true;
818	return true;
819	}
820
821	/ Passed the point where we can replenish credit? /
822	if (time_after_eq64(now, next_credit)) {
823	queue->credit_window_start = now;
824	tx_add_credit(queue);
825	}
826
827	/ Still too big to send right now? Set a callback. /
828	if (size > queue->remaining_credit) {
829	mod_timer(timer: &queue->credit_timeout,
830	expires: next_credit);
831	queue->credit_window_start = next_credit;
832	queue->rate_limited = true;
833
834	return true;
835	}
836
837	return false;
838	}
839
840	/ No locking is required in xenvif_mcast_add/del() as they are*
841	* only ever invoked from NAPI poll. An RCU list is used because
842	* xenvif_mcast_match() is called asynchronously, during start_xmit.
843	*/
844
845	static int xenvif_mcast_add(struct xenvif vif, const* u8 *addr)
846	{
847	struct xenvif_mcast_addr *mcast;
848
849	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
850	if (net_ratelimit())
851	netdev_err(dev: vif->dev,
852	format: "Too many multicast addresses\n");
853	return -ENOSPC;
854	}
855
856	mcast = kzalloc(size: sizeof(*mcast), GFP_ATOMIC);
857	if (!mcast)
858	return -ENOMEM;
859
860	ether_addr_copy(dst: mcast->addr, src: addr);
861	list_add_tail_rcu(new: &mcast->entry, head: &vif->fe_mcast_addr);
862	vif->fe_mcast_count++;
863
864	return `0`;
865	}
866
867	static void xenvif_mcast_del(struct xenvif vif, const* u8 *addr)
868	{
869	struct xenvif_mcast_addr *mcast;
870
871	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
872	if (ether_addr_equal(addr1: addr, addr2: mcast->addr)) {
873	--vif->fe_mcast_count;
874	list_del_rcu(entry: &mcast->entry);
875	kfree_rcu(mcast, rcu);
876	break;
877	}
878	}
879	}
880
881	bool xenvif_mcast_match(struct xenvif vif, const* u8 *addr)
882	{
883	struct xenvif_mcast_addr *mcast;
884
885	rcu_read_lock();
886	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
887	if (ether_addr_equal(addr1: addr, addr2: mcast->addr)) {
888	rcu_read_unlock();
889	return true;
890	}
891	}
892	rcu_read_unlock();
893
894	return false;
895	}
896
897	void xenvif_mcast_addr_list_free(struct xenvif *vif)
898	{
899	/ No need for locking or RCU here. NAPI poll and TX queue*
900	* are stopped.
901	*/
902	while (!list_empty(head: &vif->fe_mcast_addr)) {
903	struct xenvif_mcast_addr *mcast;
904
905	mcast = list_first_entry(&vif->fe_mcast_addr,
906	struct xenvif_mcast_addr,
907	entry);
908	--vif->fe_mcast_count;
909	list_del(entry: &mcast->entry);
910	kfree(objp: mcast);
911	}
912	}
913
914	static void xenvif_tx_build_gops(struct xenvif_queue *queue,
915	int budget,
916	unsigned *copy_ops,
917	unsigned *map_ops)
918	{
919	struct sk_buff skb, nskb;
920	int ret;
921	unsigned int frag_overflow;
922
923	while (skb_queue_len(list_: &queue->tx_queue) < budget) {
924	struct xen_netif_tx_request txreq;
925	struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
926	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-`1`];
927	unsigned int extra_count;
928	RING_IDX idx;
929	int work_to_do;
930	unsigned int data_len;
931
932	if (queue->tx.sring->req_prod - queue->tx.req_cons >
933	XEN_NETIF_TX_RING_SIZE) {
934	netdev_err(dev: queue->vif->dev,
935	format: "Impossible number of requests. "
936	"req_prod %d, req_cons %d, size %ld\n",
937	queue->tx.sring->req_prod, queue->tx.req_cons,
938	XEN_NETIF_TX_RING_SIZE);
939	xenvif_fatal_tx_err(vif: queue->vif);
940	break;
941	}
942
943	work_to_do = XEN_RING_NR_UNCONSUMED_REQUESTS(&queue->tx);
944	if (!work_to_do)
945	break;
946
947	idx = queue->tx.req_cons;
948	rmb(); / Ensure that we see the request before we copy it. /
949	RING_COPY_REQUEST(&queue->tx, idx, &txreq);
950
951	/ Credit-based scheduling. /
952	if (txreq.size > queue->remaining_credit &&
953	tx_credit_exceeded(queue, size: txreq.size))
954	break;
955
956	queue->remaining_credit -= txreq.size;
957
958	work_to_do--;
959	queue->tx.req_cons = ++idx;
960
961	memset(extras, `0`, sizeof(extras));
962	extra_count = `0`;
963	if (txreq.flags & XEN_NETTXF_extra_info) {
964	work_to_do = xenvif_get_extras(queue, extras,
965	extra_count: &extra_count,
966	work_to_do);
967	idx = queue->tx.req_cons;
968	if (unlikely(work_to_do < `0`))
969	break;
970	}
971
972	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - `1`].type) {
973	struct xen_netif_extra_info *extra;
974
975	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - `1`];
976	ret = xenvif_mcast_add(vif: queue->vif, addr: extra->u.mcast.addr);
977
978	make_tx_response(queue, txp: &txreq, extra_count,
979	status: (ret == `0`) ?
980	XEN_NETIF_RSP_OKAY :
981	XEN_NETIF_RSP_ERROR);
982	continue;
983	}
984
985	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - `1`].type) {
986	struct xen_netif_extra_info *extra;
987
988	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - `1`];
989	xenvif_mcast_del(vif: queue->vif, addr: extra->u.mcast.addr);
990
991	make_tx_response(queue, txp: &txreq, extra_count,
992	XEN_NETIF_RSP_OKAY);
993	continue;
994	}
995
996	data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
997	XEN_NETBACK_TX_COPY_LEN : txreq.size;
998
999	ret = xenvif_count_requests(queue, first: &txreq, extra_count,
1000	txp: txfrags, work_to_do);
1001
1002	if (unlikely(ret < `0`))
1003	break;
1004
1005	idx += ret;
1006
1007	if (unlikely(txreq.size < ETH_HLEN)) {
1008	netdev_dbg(queue->vif->dev,
1009	"Bad packet size: %d\n", txreq.size);
1010	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
1011	break;
1012	}
1013
1014	/ No crossing a page as the payload mustn't fragment. /
1015	if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
1016	netdev_err(dev: queue->vif->dev, format: "Cross page boundary, txreq.offset: %u, size: %u\n",
1017	txreq.offset, txreq.size);
1018	xenvif_fatal_tx_err(vif: queue->vif);
1019	break;
1020	}
1021
1022	if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - `1` && data_len < txreq.size)
1023	data_len = txreq.size;
1024
1025	skb = xenvif_alloc_skb(size: data_len);
1026	if (unlikely(skb == NULL)) {
1027	netdev_dbg(queue->vif->dev,
1028	"Can't allocate a skb in start_xmit.\n");
1029	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
1030	break;
1031	}
1032
1033	skb_shinfo(skb)->nr_frags = ret;
1034	/ At this point shinfo->nr_frags is in fact the number of*
1035	* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
1036	*/
1037	frag_overflow = `0`;
1038	nskb = NULL;
1039	if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
1040	frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
1041	BUG_ON(frag_overflow > MAX_SKB_FRAGS);
1042	skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
1043	nskb = xenvif_alloc_skb(size: `0`);
1044	if (unlikely(nskb == NULL)) {
1045	skb_shinfo(skb)->nr_frags = `0`;
1046	kfree_skb(skb);
1047	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
1048	if (net_ratelimit())
1049	netdev_err(dev: queue->vif->dev,
1050	format: "Can't allocate the frag_list skb.\n");
1051	break;
1052	}
1053	}
1054
1055	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - `1`].type) {
1056	struct xen_netif_extra_info *gso;
1057	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - `1`];
1058
1059	if (xenvif_set_skb_gso(vif: queue->vif, skb, gso)) {
1060	/ Failure in xenvif_set_skb_gso is fatal. /
1061	skb_shinfo(skb)->nr_frags = `0`;
1062	kfree_skb(skb);
1063	kfree_skb(skb: nskb);
1064	break;
1065	}
1066	}
1067
1068	if (extras[XEN_NETIF_EXTRA_TYPE_HASH - `1`].type) {
1069	struct xen_netif_extra_info *extra;
1070	enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
1071
1072	extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - `1`];
1073
1074	switch (extra->u.hash.type) {
1075	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
1076	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
1077	type = PKT_HASH_TYPE_L3;
1078	break;
1079
1080	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
1081	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
1082	type = PKT_HASH_TYPE_L4;
1083	break;
1084
1085	default:
1086	break;
1087	}
1088
1089	if (type != PKT_HASH_TYPE_NONE)
1090	skb_set_hash(skb,
1091	hash: (u32 )extra->u.hash.value,
1092	type);
1093	}
1094
1095	xenvif_get_requests(queue, skb, first: &txreq, txfrags, copy_ops,
1096	map_ops, frag_overflow, nskb, extra_count,
1097	data_len);
1098
1099	__skb_queue_tail(list: &queue->tx_queue, newsk: skb);
1100
1101	queue->tx.req_cons = idx;
1102	}
1103
1104	return;
1105	}
1106
1107	/ Consolidate skb with a frag_list into a brand new one with local pages on*
1108	* frags. Returns 0 or -ENOMEM if can't allocate new pages.
1109	*/
1110	static int xenvif_handle_frag_list(struct xenvif_queue queue, struct* sk_buff *skb)
1111	{
1112	unsigned int offset = skb_headlen(skb);
1113	skb_frag_t frags[MAX_SKB_FRAGS];
1114	int i, f;
1115	struct ubuf_info *uarg;
1116	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1117
1118	queue->stats.tx_zerocopy_sent += `2`;
1119	queue->stats.tx_frag_overflow++;
1120
1121	xenvif_fill_frags(queue, skb: nskb);
1122	/ Subtract frags size, we will correct it later /
1123	skb->truesize -= skb->data_len;
1124	skb->len += nskb->len;
1125	skb->data_len += nskb->len;
1126
1127	/ create a brand new frags array and coalesce there /
1128	for (i = `0`; offset < skb->len; i++) {
1129	struct page *page;
1130	unsigned int len;
1131
1132	BUG_ON(i >= MAX_SKB_FRAGS);
1133	page = alloc_page(GFP_ATOMIC);
1134	if (!page) {
1135	int j;
1136	skb->truesize += skb->data_len;
1137	for (j = `0`; j < i; j++)
1138	put_page(page: skb_frag_page(frag: &frags[j]));
1139	return -ENOMEM;
1140	}
1141
1142	if (offset + PAGE_SIZE < skb->len)
1143	len = PAGE_SIZE;
1144	else
1145	len = skb->len - offset;
1146	if (skb_copy_bits(skb, offset, page_address(page), len))
1147	BUG();
1148
1149	offset += len;
1150	skb_frag_fill_page_desc(frag: &frags[i], page, off: `0`, size: len);
1151	}
1152
1153	/ Release all the original (foreign) frags. /
1154	for (f = `0`; f < skb_shinfo(skb)->nr_frags; f++)
1155	skb_frag_unref(skb, f);
1156	uarg = skb_shinfo(skb)->destructor_arg;
1157	/ increase inflight counter to offset decrement in callback /
1158	atomic_inc(v: &queue->inflight_packets);
1159	uarg->callback(NULL, uarg, true);
1160	skb_shinfo(skb)->destructor_arg = NULL;
1161
1162	/ Fill the skb with the new (local) frags. /
1163	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
1164	skb_shinfo(skb)->nr_frags = i;
1165	skb->truesize += i * PAGE_SIZE;
1166
1167	return `0`;
1168	}
1169
1170	static int xenvif_tx_submit(struct xenvif_queue *queue)
1171	{
1172	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
1173	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
1174	struct sk_buff *skb;
1175	int work_done = `0`;
1176
1177	while ((skb = __skb_dequeue(list: &queue->tx_queue)) != NULL) {
1178	struct xen_netif_tx_request *txp;
1179	u16 pending_idx;
1180
1181	pending_idx = copy_pending_idx(skb, `0`);
1182	txp = &queue->pending_tx_info[pending_idx].req;
1183
1184	/ Check the remap error code. /
1185	if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
1186	/ If there was an error, xenvif_tx_check_gop is*
1187	* expected to release all the frags which were mapped,
1188	* so kfree_skb shouldn't do it again
1189	*/
1190	skb_shinfo(skb)->nr_frags = `0`;
1191	if (skb_has_frag_list(skb)) {
1192	struct sk_buff *nskb =
1193	skb_shinfo(skb)->frag_list;
1194	skb_shinfo(nskb)->nr_frags = `0`;
1195	}
1196	kfree_skb(skb);
1197	continue;
1198	}
1199
1200	if (txp->flags & XEN_NETTXF_csum_blank)
1201	skb->ip_summed = CHECKSUM_PARTIAL;
1202	else if (txp->flags & XEN_NETTXF_data_validated)
1203	skb->ip_summed = CHECKSUM_UNNECESSARY;
1204
1205	xenvif_fill_frags(queue, skb);
1206
1207	if (unlikely(skb_has_frag_list(skb))) {
1208	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1209	xenvif_skb_zerocopy_prepare(queue, skb: nskb);
1210	if (xenvif_handle_frag_list(queue, skb)) {
1211	if (net_ratelimit())
1212	netdev_err(dev: queue->vif->dev,
1213	format: "Not enough memory to consolidate frag_list!\n");
1214	xenvif_skb_zerocopy_prepare(queue, skb);
1215	kfree_skb(skb);
1216	continue;
1217	}
1218	/ Copied all the bits from the frag list -- free it. /
1219	skb_frag_list_init(skb);
1220	kfree_skb(skb: nskb);
1221	}
1222
1223	skb->dev = queue->vif->dev;
1224	skb->protocol = eth_type_trans(skb, dev: skb->dev);
1225	skb_reset_network_header(skb);
1226
1227	if (checksum_setup(queue, skb)) {
1228	netdev_dbg(queue->vif->dev,
1229	"Can't setup checksum in net_tx_action\n");
1230	/ We have to set this flag to trigger the callback /
1231	if (skb_shinfo(skb)->destructor_arg)
1232	xenvif_skb_zerocopy_prepare(queue, skb);
1233	kfree_skb(skb);
1234	continue;
1235	}
1236
1237	skb_probe_transport_header(skb);
1238
1239	/ If the packet is GSO then we will have just set up the*
1240	* transport header offset in checksum_setup so it's now
1241	* straightforward to calculate gso_segs.
1242	*/
1243	if (skb_is_gso(skb)) {
1244	int mss, hdrlen;
1245
1246	/ GSO implies having the L4 header. /
1247	WARN_ON_ONCE(!skb_transport_header_was_set(skb));
1248	if (unlikely(!skb_transport_header_was_set(skb))) {
1249	kfree_skb(skb);
1250	continue;
1251	}
1252
1253	mss = skb_shinfo(skb)->gso_size;
1254	hdrlen = skb_tcp_all_headers(skb);
1255
1256	skb_shinfo(skb)->gso_segs =
1257	DIV_ROUND_UP(skb->len - hdrlen, mss);
1258	}
1259
1260	queue->stats.rx_bytes += skb->len;
1261	queue->stats.rx_packets++;
1262
1263	work_done++;
1264
1265	/ Set this flag right before netif_receive_skb, otherwise*
1266	* someone might think this packet already left netback, and
1267	* do a skb_copy_ubufs while we are still in control of the
1268	* skb. E.g. the __pskb_pull_tail earlier can do such thing.
1269	*/
1270	if (skb_shinfo(skb)->destructor_arg) {
1271	xenvif_skb_zerocopy_prepare(queue, skb);
1272	queue->stats.tx_zerocopy_sent++;
1273	}
1274
1275	netif_receive_skb(skb);
1276	}
1277
1278	return work_done;
1279	}
1280
1281	void xenvif_zerocopy_callback(struct sk_buff skb, struct* ubuf_info *ubuf_base,
1282	bool zerocopy_success)
1283	{
1284	unsigned long flags;
1285	pending_ring_idx_t index;
1286	struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
1287	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
1288
1289	/ This is the only place where we grab this lock, to protect callbacks*
1290	* from each other.
1291	*/
1292	spin_lock_irqsave(&queue->callback_lock, flags);
1293	do {
1294	u16 pending_idx = ubuf->desc;
1295	ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
1296	BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
1297	MAX_PENDING_REQS);
1298	index = pending_index(i: queue->dealloc_prod);
1299	queue->dealloc_ring[index] = pending_idx;
1300	/ Sync with xenvif_tx_dealloc_action:*
1301	* insert idx then incr producer.
1302	*/
1303	smp_wmb();
1304	queue->dealloc_prod++;
1305	} while (ubuf);
1306	spin_unlock_irqrestore(lock: &queue->callback_lock, flags);
1307
1308	if (likely(zerocopy_success))
1309	queue->stats.tx_zerocopy_success++;
1310	else
1311	queue->stats.tx_zerocopy_fail++;
1312	xenvif_skb_zerocopy_complete(queue);
1313	}
1314
1315	static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
1316	{
1317	struct gnttab_unmap_grant_ref *gop;
1318	pending_ring_idx_t dc, dp;
1319	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1320	unsigned int i = `0`;
1321
1322	dc = queue->dealloc_cons;
1323	gop = queue->tx_unmap_ops;
1324
1325	/ Free up any grants we have finished using /
1326	do {
1327	dp = queue->dealloc_prod;
1328
1329	/ Ensure we see all indices enqueued by all*
1330	* xenvif_zerocopy_callback().
1331	*/
1332	smp_rmb();
1333
1334	while (dc != dp) {
1335	BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
1336	pending_idx =
1337	queue->dealloc_ring[pending_index(i: dc++)];
1338
1339	pending_idx_release[gop - queue->tx_unmap_ops] =
1340	pending_idx;
1341	queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
1342	queue->mmap_pages[pending_idx];
1343	gnttab_set_unmap_op(unmap: gop,
1344	addr: idx_to_kaddr(queue, idx: pending_idx),
1345	GNTMAP_host_map,
1346	handle: queue->grant_tx_handle[pending_idx]);
1347	xenvif_grant_handle_reset(queue, pending_idx);
1348	++gop;
1349	}
1350
1351	} while (dp != queue->dealloc_prod);
1352
1353	queue->dealloc_cons = dc;
1354
1355	if (gop - queue->tx_unmap_ops > `0`) {
1356	int ret;
1357	ret = gnttab_unmap_refs(unmap_ops: queue->tx_unmap_ops,
1358	NULL,
1359	pages: queue->pages_to_unmap,
1360	count: gop - queue->tx_unmap_ops);
1361	if (ret) {
1362	netdev_err(dev: queue->vif->dev, format: "Unmap fail: nr_ops %tu ret %d\n",
1363	gop - queue->tx_unmap_ops, ret);
1364	for (i = `0`; i < gop - queue->tx_unmap_ops; ++i) {
1365	if (gop[i].status != GNTST_okay)
1366	netdev_err(dev: queue->vif->dev,
1367	format: " host_addr: 0x%llx handle: 0x%x status: %d\n",
1368	gop[i].host_addr,
1369	gop[i].handle,
1370	gop[i].status);
1371	}
1372	BUG();
1373	}
1374	}
1375
1376	for (i = `0`; i < gop - queue->tx_unmap_ops; ++i)
1377	xenvif_idx_release(queue, pending_idx: pending_idx_release[i],
1378	XEN_NETIF_RSP_OKAY);
1379	}
1380
1381
1382	/ Called after netfront has transmitted /
1383	int xenvif_tx_action(struct xenvif_queue queue, int* budget)
1384	{
1385	unsigned nr_mops = `0`, nr_cops = `0`;
1386	int work_done, ret;
1387
1388	if (unlikely(!tx_work_todo(queue)))
1389	return `0`;
1390
1391	xenvif_tx_build_gops(queue, budget, copy_ops: &nr_cops, map_ops: &nr_mops);
1392
1393	if (nr_cops == `0`)
1394	return `0`;
1395
1396	gnttab_batch_copy(batch: queue->tx_copy_ops, count: nr_cops);
1397	if (nr_mops != `0`) {
1398	ret = gnttab_map_refs(map_ops: queue->tx_map_ops,
1399	NULL,
1400	pages: queue->pages_to_map,
1401	count: nr_mops);
1402	if (ret) {
1403	unsigned int i;
1404
1405	netdev_err(dev: queue->vif->dev, format: "Map fail: nr %u ret %d\n",
1406	nr_mops, ret);
1407	for (i = `0`; i < nr_mops; ++i)
1408	WARN_ON_ONCE(queue->tx_map_ops[i].status ==
1409	GNTST_okay);
1410	}
1411	}
1412
1413	work_done = xenvif_tx_submit(queue);
1414
1415	return work_done;
1416	}
1417
1418	static void _make_tx_response(struct xenvif_queue *queue,
1419	const struct xen_netif_tx_request *txp,
1420	unsigned int extra_count,
1421	s8 status)
1422	{
1423	RING_IDX i = queue->tx.rsp_prod_pvt;
1424	struct xen_netif_tx_response *resp;
1425
1426	resp = RING_GET_RESPONSE(&queue->tx, i);
1427	resp->id = txp->id;
1428	resp->status = status;
1429
1430	while (extra_count-- != `0`)
1431	RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1432
1433	queue->tx.rsp_prod_pvt = ++i;
1434	}
1435
1436	static void push_tx_responses(struct xenvif_queue *queue)
1437	{
1438	int notify;
1439
1440	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
1441	if (notify)
1442	notify_remote_via_irq(irq: queue->tx_irq);
1443	}
1444
1445	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
1446	s8 status)
1447	{
1448	struct pending_tx_info *pending_tx_info;
1449	pending_ring_idx_t index;
1450	unsigned long flags;
1451
1452	pending_tx_info = &queue->pending_tx_info[pending_idx];
1453
1454	spin_lock_irqsave(&queue->response_lock, flags);
1455
1456	_make_tx_response(queue, txp: &pending_tx_info->req,
1457	extra_count: pending_tx_info->extra_count, status);
1458
1459	/ Release the pending index before pusing the Tx response so*
1460	* its available before a new Tx request is pushed by the
1461	* frontend.
1462	*/
1463	index = pending_index(i: queue->pending_prod++);
1464	queue->pending_ring[index] = pending_idx;
1465
1466	push_tx_responses(queue);
1467
1468	spin_unlock_irqrestore(lock: &queue->response_lock, flags);
1469	}
1470
1471	static void make_tx_response(struct xenvif_queue *queue,
1472	const struct xen_netif_tx_request *txp,
1473	unsigned int extra_count,
1474	s8 status)
1475	{
1476	unsigned long flags;
1477
1478	spin_lock_irqsave(&queue->response_lock, flags);
1479
1480	_make_tx_response(queue, txp, extra_count, status);
1481	push_tx_responses(queue);
1482
1483	spin_unlock_irqrestore(lock: &queue->response_lock, flags);
1484	}
1485
1486	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1487	{
1488	int ret;
1489	struct gnttab_unmap_grant_ref tx_unmap_op;
1490
1491	gnttab_set_unmap_op(unmap: &tx_unmap_op,
1492	addr: idx_to_kaddr(queue, idx: pending_idx),
1493	GNTMAP_host_map,
1494	handle: queue->grant_tx_handle[pending_idx]);
1495	xenvif_grant_handle_reset(queue, pending_idx);
1496
1497	ret = gnttab_unmap_refs(unmap_ops: &tx_unmap_op, NULL,
1498	pages: &queue->mmap_pages[pending_idx], count: `1`);
1499	if (ret) {
1500	netdev_err(dev: queue->vif->dev,
1501	format: "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
1502	ret,
1503	pending_idx,
1504	tx_unmap_op.host_addr,
1505	tx_unmap_op.handle,
1506	tx_unmap_op.status);
1507	BUG();
1508	}
1509	}
1510
1511	static inline int tx_work_todo(struct xenvif_queue *queue)
1512	{
1513	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
1514	return `1`;
1515
1516	return `0`;
1517	}
1518
1519	static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
1520	{
1521	return queue->dealloc_cons != queue->dealloc_prod;
1522	}
1523
1524	void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
1525	{
1526	if (queue->tx.sring)
1527	xenbus_unmap_ring_vfree(dev: xenvif_to_xenbus_device(vif: queue->vif),
1528	vaddr: queue->tx.sring);
1529	if (queue->rx.sring)
1530	xenbus_unmap_ring_vfree(dev: xenvif_to_xenbus_device(vif: queue->vif),
1531	vaddr: queue->rx.sring);
1532	}
1533
1534	int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
1535	grant_ref_t tx_ring_ref,
1536	grant_ref_t rx_ring_ref)
1537	{
1538	void *addr;
1539	struct xen_netif_tx_sring *txs;
1540	struct xen_netif_rx_sring *rxs;
1541	RING_IDX rsp_prod, req_prod;
1542	int err;
1543
1544	err = xenbus_map_ring_valloc(dev: xenvif_to_xenbus_device(vif: queue->vif),
1545	gnt_refs: &tx_ring_ref, nr_grefs: `1`, vaddr: &addr);
1546	if (err)
1547	goto err;
1548
1549	txs = (struct xen_netif_tx_sring *)addr;
1550	rsp_prod = READ_ONCE(txs->rsp_prod);
1551	req_prod = READ_ONCE(txs->req_prod);
1552
1553	BACK_RING_ATTACH(&queue->tx, txs, rsp_prod, XEN_PAGE_SIZE);
1554
1555	err = -EIO;
1556	if (req_prod - rsp_prod > RING_SIZE(&queue->tx))
1557	goto err;
1558
1559	err = xenbus_map_ring_valloc(dev: xenvif_to_xenbus_device(vif: queue->vif),
1560	gnt_refs: &rx_ring_ref, nr_grefs: `1`, vaddr: &addr);
1561	if (err)
1562	goto err;
1563
1564	rxs = (struct xen_netif_rx_sring *)addr;
1565	rsp_prod = READ_ONCE(rxs->rsp_prod);
1566	req_prod = READ_ONCE(rxs->req_prod);
1567
1568	BACK_RING_ATTACH(&queue->rx, rxs, rsp_prod, XEN_PAGE_SIZE);
1569
1570	err = -EIO;
1571	if (req_prod - rsp_prod > RING_SIZE(&queue->rx))
1572	goto err;
1573
1574	return `0`;
1575
1576	err:
1577	xenvif_unmap_frontend_data_rings(queue);
1578	return err;
1579	}
1580
1581	static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
1582	{
1583	/ Dealloc thread must remain running until all inflight*
1584	* packets complete.
1585	*/
1586	return kthread_should_stop() &&
1587	!atomic_read(v: &queue->inflight_packets);
1588	}
1589
1590	int xenvif_dealloc_kthread(void *data)
1591	{
1592	struct xenvif_queue *queue = data;
1593
1594	for (;;) {
1595	wait_event_interruptible(queue->dealloc_wq,
1596	tx_dealloc_work_todo(queue) \|\|
1597	xenvif_dealloc_kthread_should_stop(queue));
1598	if (xenvif_dealloc_kthread_should_stop(queue))
1599	break;
1600
1601	xenvif_tx_dealloc_action(queue);
1602	cond_resched();
1603	}
1604
1605	/ Unmap anything remaining/
1606	if (tx_dealloc_work_todo(queue))
1607	xenvif_tx_dealloc_action(queue);
1608
1609	return `0`;
1610	}
1611
1612	static void make_ctrl_response(struct xenvif *vif,
1613	const struct xen_netif_ctrl_request *req,
1614	u32 status, u32 data)
1615	{
1616	RING_IDX idx = vif->ctrl.rsp_prod_pvt;
1617	struct xen_netif_ctrl_response rsp = {
1618	.id = req->id,
1619	.type = req->type,
1620	.status = status,
1621	.data = data,
1622	};
1623
1624	*RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
1625	vif->ctrl.rsp_prod_pvt = ++idx;
1626	}
1627
1628	static void push_ctrl_response(struct xenvif *vif)
1629	{
1630	int notify;
1631
1632	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
1633	if (notify)
1634	notify_remote_via_irq(irq: vif->ctrl_irq);
1635	}
1636
1637	static void process_ctrl_request(struct xenvif *vif,
1638	const struct xen_netif_ctrl_request *req)
1639	{
1640	u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
1641	u32 data = `0`;
1642
1643	switch (req->type) {
1644	case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
1645	status = xenvif_set_hash_alg(vif, alg: req->data[`0`]);
1646	break;
1647
1648	case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
1649	status = xenvif_get_hash_flags(vif, flags: &data);
1650	break;
1651
1652	case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
1653	status = xenvif_set_hash_flags(vif, flags: req->data[`0`]);
1654	break;
1655
1656	case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
1657	status = xenvif_set_hash_key(vif, gref: req->data[`0`],
1658	len: req->data[`1`]);
1659	break;
1660
1661	case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
1662	status = XEN_NETIF_CTRL_STATUS_SUCCESS;
1663	data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
1664	break;
1665
1666	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
1667	status = xenvif_set_hash_mapping_size(vif,
1668	size: req->data[`0`]);
1669	break;
1670
1671	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
1672	status = xenvif_set_hash_mapping(vif, gref: req->data[`0`],
1673	len: req->data[`1`],
1674	off: req->data[`2`]);
1675	break;
1676
1677	default:
1678	break;
1679	}
1680
1681	make_ctrl_response(vif, req, status, data);
1682	push_ctrl_response(vif);
1683	}
1684
1685	static void xenvif_ctrl_action(struct xenvif *vif)
1686	{
1687	for (;;) {
1688	RING_IDX req_prod, req_cons;
1689
1690	req_prod = vif->ctrl.sring->req_prod;
1691	req_cons = vif->ctrl.req_cons;
1692
1693	/ Make sure we can see requests before we process them. /
1694	rmb();
1695
1696	if (req_cons == req_prod)
1697	break;
1698
1699	while (req_cons != req_prod) {
1700	struct xen_netif_ctrl_request req;
1701
1702	RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
1703	req_cons++;
1704
1705	process_ctrl_request(vif, req: &req);
1706	}
1707
1708	vif->ctrl.req_cons = req_cons;
1709	vif->ctrl.sring->req_event = req_cons + `1`;
1710	}
1711	}
1712
1713	static bool xenvif_ctrl_work_todo(struct xenvif *vif)
1714	{
1715	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
1716	return true;
1717
1718	return false;
1719	}
1720
1721	irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
1722	{
1723	struct xenvif *vif = data;
1724	unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
1725
1726	while (xenvif_ctrl_work_todo(vif)) {
1727	xenvif_ctrl_action(vif);
1728	eoi_flag = `0`;
1729	}
1730
1731	xen_irq_lateeoi(irq, eoi_flags: eoi_flag);
1732
1733	return IRQ_HANDLED;
1734	}
1735
1736	static int __init netback_init(void)
1737	{
1738	int rc = `0`;
1739
1740	if (!xen_domain())
1741	return -ENODEV;
1742
1743	/ Allow as many queues as there are CPUs but max. 8 if user has not*
1744	* specified a value.
1745	*/
1746	if (xenvif_max_queues == `0`)
1747	xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
1748	num_online_cpus());
1749
1750	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1751	pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1752	fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1753	fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1754	}
1755
1756	rc = xenvif_xenbus_init();
1757	if (rc)
1758	goto failed_init;
1759
1760	#ifdef CONFIG_DEBUG_FS
1761	xen_netback_dbg_root = debugfs_create_dir(name: "xen-netback", NULL);
1762	#endif /* CONFIG_DEBUG_FS */
1763
1764	return `0`;
1765
1766	failed_init:
1767	return rc;
1768	}
1769
1770	module_init(netback_init);
1771
1772	static void __exit netback_fini(void)
1773	{
1774	#ifdef CONFIG_DEBUG_FS
1775	debugfs_remove_recursive(dentry: xen_netback_dbg_root);
1776	#endif /* CONFIG_DEBUG_FS */
1777	xenvif_xenbus_fini();
1778	}
1779	module_exit(netback_fini);
1780
1781	MODULE_DESCRIPTION("Xen backend network device module");
1782	MODULE_LICENSE("Dual BSD/GPL");
1783	MODULE_ALIAS("xen-backend:vif");
1784

source code of linux/drivers/net/xen-netback/netback.c