l2t.c source code [linux/drivers/net/ethernet/chelsio/cxgb3/l2t.c]

1	/*
2	* Copyright (c) 2003-2008 Chelsio, Inc. All rights reserved.
3	*
4	* This software is available to you under a choice of one of two
5	* licenses. You may choose to be licensed under the terms of the GNU
6	* General Public License (GPL) Version 2, available from the file
7	* COPYING in the main directory of this source tree, or the
8	* OpenIB.org BSD license below:
9	*
10	* Redistribution and use in source and binary forms, with or
11	* without modification, are permitted provided that the following
12	* conditions are met:
13	*
14	* - Redistributions of source code must retain the above
15	* copyright notice, this list of conditions and the following
16	* disclaimer.
17	*
18	* - Redistributions in binary form must reproduce the above
19	* copyright notice, this list of conditions and the following
20	* disclaimer in the documentation and/or other materials
21	* provided with the distribution.
22	*
23	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30	* SOFTWARE.
31	*/
32	#include <linux/skbuff.h>
33	#include <linux/netdevice.h>
34	#include <linux/if.h>
35	#include <linux/if_vlan.h>
36	#include <linux/jhash.h>
37	#include <linux/slab.h>
38	#include <linux/export.h>
39	#include <net/neighbour.h>
40	#include "common.h"
41	#include "t3cdev.h"
42	#include "cxgb3_defs.h"
43	#include "l2t.h"
44	#include "t3_cpl.h"
45	#include "firmware_exports.h"
46
47	#define VLAN_NONE 0xfff
48
49	/*
50	* Module locking notes: There is a RW lock protecting the L2 table as a
51	* whole plus a spinlock per L2T entry. Entry lookups and allocations happen
52	* under the protection of the table lock, individual entry changes happen
53	* while holding that entry's spinlock. The table lock nests outside the
54	* entry locks. Allocations of new entries take the table lock as writers so
55	* no other lookups can happen while allocating new entries. Entry updates
56	* take the table lock as readers so multiple entries can be updated in
57	* parallel. An L2T entry can be dropped by decrementing its reference count
58	* and therefore can happen in parallel with entry allocation but no entry
59	* can change state or increment its ref count during allocation as both of
60	* these perform lookups.
61	*/
62
63	static inline unsigned int vlan_prio(const struct l2t_entry *e)
64	{
65	return e->vlan >> `13`;
66	}
67
68	static inline unsigned int arp_hash(u32 key, int ifindex,
69	const struct l2t_data *d)
70	{
71	return jhash_2words(a: key, b: ifindex, initval: `0`) & (d->nentries - `1`);
72	}
73
74	static inline void neigh_replace(struct l2t_entry e, struct* neighbour *n)
75	{
76	neigh_hold(n);
77	if (e->neigh)
78	neigh_release(neigh: e->neigh);
79	e->neigh = n;
80	}
81
82	/*
83	* Set up an L2T entry and send any packets waiting in the arp queue. The
84	* supplied skb is used for the CPL_L2T_WRITE_REQ. Must be called with the
85	* entry locked.
86	*/
87	static int setup_l2e_send_pending(struct t3cdev dev, struct* sk_buff *skb,
88	struct l2t_entry *e)
89	{
90	struct cpl_l2t_write_req *req;
91	struct sk_buff *tmp;
92
93	if (!skb) {
94	skb = alloc_skb(size: sizeof(*req), GFP_ATOMIC);
95	if (!skb)
96	return -ENOMEM;
97	}
98
99	req = __skb_put(skb, len: sizeof(*req));
100	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
101	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
102	req->params = htonl(V_L2T_W_IDX(e->idx) \| V_L2T_W_IFF(e->smt_idx) \|
103	V_L2T_W_VLAN(e->vlan & VLAN_VID_MASK) \|
104	V_L2T_W_PRIO(vlan_prio(e)));
105	memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
106	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
107	skb->priority = CPL_PRIORITY_CONTROL;
108	cxgb3_ofld_send(dev, skb);
109
110	skb_queue_walk_safe(&e->arpq, skb, tmp) {
111	__skb_unlink(skb, list: &e->arpq);
112	cxgb3_ofld_send(dev, skb);
113	}
114	e->state = L2T_STATE_VALID;
115
116	return `0`;
117	}
118
119	/*
120	* Add a packet to the an L2T entry's queue of packets awaiting resolution.
121	* Must be called with the entry's lock held.
122	*/
123	static inline void arpq_enqueue(struct l2t_entry e, struct* sk_buff *skb)
124	{
125	__skb_queue_tail(list: &e->arpq, newsk: skb);
126	}
127
128	int t3_l2t_send_slow(struct t3cdev dev, struct* sk_buff *skb,
129	struct l2t_entry *e)
130	{
131	again:
132	switch (e->state) {
133	case L2T_STATE_STALE: / entry is stale, kick off revalidation /
134	neigh_event_send(neigh: e->neigh, NULL);
135	spin_lock_bh(lock: &e->lock);
136	if (e->state == L2T_STATE_STALE)
137	e->state = L2T_STATE_VALID;
138	spin_unlock_bh(lock: &e->lock);
139	fallthrough;
140	case L2T_STATE_VALID: / fast-path, send the packet on /
141	return cxgb3_ofld_send(dev, skb);
142	case L2T_STATE_RESOLVING:
143	spin_lock_bh(lock: &e->lock);
144	if (e->state != L2T_STATE_RESOLVING) {
145	/ ARP already completed /
146	spin_unlock_bh(lock: &e->lock);
147	goto again;
148	}
149	arpq_enqueue(e, skb);
150	spin_unlock_bh(lock: &e->lock);
151
152	/*
153	* Only the first packet added to the arpq should kick off
154	* resolution. However, because the alloc_skb below can fail,
155	* we allow each packet added to the arpq to retry resolution
156	* as a way of recovering from transient memory exhaustion.
157	* A better way would be to use a work request to retry L2T
158	* entries when there's no memory.
159	*/
160	if (!neigh_event_send(neigh: e->neigh, NULL)) {
161	skb = alloc_skb(size: sizeof(struct cpl_l2t_write_req),
162	GFP_ATOMIC);
163	if (!skb)
164	break;
165
166	spin_lock_bh(lock: &e->lock);
167	if (!skb_queue_empty(list: &e->arpq))
168	setup_l2e_send_pending(dev, skb, e);
169	else / we lost the race /
170	__kfree_skb(skb);
171	spin_unlock_bh(lock: &e->lock);
172	}
173	}
174	return `0`;
175	}
176
177	EXPORT_SYMBOL(t3_l2t_send_slow);
178
179	void t3_l2t_send_event(struct t3cdev dev, struct* l2t_entry *e)
180	{
181	again:
182	switch (e->state) {
183	case L2T_STATE_STALE: / entry is stale, kick off revalidation /
184	neigh_event_send(neigh: e->neigh, NULL);
185	spin_lock_bh(lock: &e->lock);
186	if (e->state == L2T_STATE_STALE) {
187	e->state = L2T_STATE_VALID;
188	}
189	spin_unlock_bh(lock: &e->lock);
190	return;
191	case L2T_STATE_VALID: / fast-path, send the packet on /
192	return;
193	case L2T_STATE_RESOLVING:
194	spin_lock_bh(lock: &e->lock);
195	if (e->state != L2T_STATE_RESOLVING) {
196	/ ARP already completed /
197	spin_unlock_bh(lock: &e->lock);
198	goto again;
199	}
200	spin_unlock_bh(lock: &e->lock);
201
202	/*
203	* Only the first packet added to the arpq should kick off
204	* resolution. However, because the alloc_skb below can fail,
205	* we allow each packet added to the arpq to retry resolution
206	* as a way of recovering from transient memory exhaustion.
207	* A better way would be to use a work request to retry L2T
208	* entries when there's no memory.
209	*/
210	neigh_event_send(neigh: e->neigh, NULL);
211	}
212	}
213
214	EXPORT_SYMBOL(t3_l2t_send_event);
215
216	/*
217	* Allocate a free L2T entry. Must be called with l2t_data.lock held.
218	*/
219	static struct l2t_entry alloc_l2e(struct* l2t_data *d)
220	{
221	struct l2t_entry end, e, **p;
222
223	if (!atomic_read(v: &d->nfree))
224	return NULL;
225
226	/ there's definitely a free entry /
227	for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
228	if (atomic_read(v: &e->refcnt) == `0`)
229	goto found;
230
231	for (e = &d->l2tab[`1`]; atomic_read(v: &e->refcnt); ++e) ;
232	found:
233	d->rover = e + `1`;
234	atomic_dec(v: &d->nfree);
235
236	/*
237	* The entry we found may be an inactive entry that is
238	* presently in the hash table. We need to remove it.
239	*/
240	if (e->state != L2T_STATE_UNUSED) {
241	int hash = arp_hash(key: e->addr, ifindex: e->ifindex, d);
242
243	for (p = &d->l2tab[hash].first; p; p = &(p)->next)
244	if (*p == e) {
245	*p = e->next;
246	break;
247	}
248	e->state = L2T_STATE_UNUSED;
249	}
250	return e;
251	}
252
253	/*
254	* Called when an L2T entry has no more users. The entry is left in the hash
255	* table since it is likely to be reused but we also bump nfree to indicate
256	* that the entry can be reallocated for a different neighbor. We also drop
257	* the existing neighbor reference in case the neighbor is going away and is
258	* waiting on our reference.
259	*
260	* Because entries can be reallocated to other neighbors once their ref count
261	* drops to 0 we need to take the entry's lock to avoid races with a new
262	* incarnation.
263	*/
264	void t3_l2e_free(struct l2t_data d, struct* l2t_entry *e)
265	{
266	spin_lock_bh(lock: &e->lock);
267	if (atomic_read(v: &e->refcnt) == `0`) { / hasn't been recycled /
268	if (e->neigh) {
269	neigh_release(neigh: e->neigh);
270	e->neigh = NULL;
271	}
272	}
273	spin_unlock_bh(lock: &e->lock);
274	atomic_inc(v: &d->nfree);
275	}
276
277	EXPORT_SYMBOL(t3_l2e_free);
278
279	/*
280	* Update an L2T entry that was previously used for the same next hop as neigh.
281	* Must be called with softirqs disabled.
282	*/
283	static inline void reuse_entry(struct l2t_entry e, struct* neighbour *neigh)
284	{
285	unsigned int nud_state;
286
287	spin_lock(lock: &e->lock); / avoid race with t3_l2t_free /
288
289	if (neigh != e->neigh)
290	neigh_replace(e, n: neigh);
291	nud_state = neigh->nud_state;
292	if (memcmp(p: e->dmac, q: neigh->ha, size: sizeof(e->dmac)) \|\|
293	!(nud_state & NUD_VALID))
294	e->state = L2T_STATE_RESOLVING;
295	else if (nud_state & NUD_CONNECTED)
296	e->state = L2T_STATE_VALID;
297	else
298	e->state = L2T_STATE_STALE;
299	spin_unlock(lock: &e->lock);
300	}
301
302	struct l2t_entry t3_l2t_get(struct* t3cdev cdev, struct* dst_entry *dst,
303	struct net_device dev, const* void *daddr)
304	{
305	struct l2t_entry *e = NULL;
306	struct neighbour *neigh;
307	struct port_info *p;
308	struct l2t_data *d;
309	int hash;
310	u32 addr;
311	int ifidx;
312	int smt_idx;
313
314	rcu_read_lock();
315	neigh = dst_neigh_lookup(dst, daddr);
316	if (!neigh)
317	goto done_rcu;
318
319	addr = (u32 ) neigh->primary_key;
320	ifidx = neigh->dev->ifindex;
321
322	if (!dev)
323	dev = neigh->dev;
324	p = netdev_priv(dev);
325	smt_idx = p->port_id;
326
327	d = L2DATA(cdev);
328	if (!d)
329	goto done_rcu;
330
331	hash = arp_hash(key: addr, ifindex: ifidx, d);
332
333	write_lock_bh(&d->lock);
334	for (e = d->l2tab[hash].first; e; e = e->next)
335	if (e->addr == addr && e->ifindex == ifidx &&
336	e->smt_idx == smt_idx) {
337	l2t_hold(d, e);
338	if (atomic_read(v: &e->refcnt) == `1`)
339	reuse_entry(e, neigh);
340	goto done_unlock;
341	}
342
343	/ Need to allocate a new entry /
344	e = alloc_l2e(d);
345	if (e) {
346	spin_lock(lock: &e->lock); / avoid race with t3_l2t_free /
347	e->next = d->l2tab[hash].first;
348	d->l2tab[hash].first = e;
349	e->state = L2T_STATE_RESOLVING;
350	e->addr = addr;
351	e->ifindex = ifidx;
352	e->smt_idx = smt_idx;
353	atomic_set(v: &e->refcnt, i: `1`);
354	neigh_replace(e, n: neigh);
355	if (is_vlan_dev(dev: neigh->dev))
356	e->vlan = vlan_dev_vlan_id(dev: neigh->dev);
357	else
358	e->vlan = VLAN_NONE;
359	spin_unlock(lock: &e->lock);
360	}
361	done_unlock:
362	write_unlock_bh(&d->lock);
363	done_rcu:
364	if (neigh)
365	neigh_release(neigh);
366	rcu_read_unlock();
367	return e;
368	}
369
370	EXPORT_SYMBOL(t3_l2t_get);
371
372	/*
373	* Called when address resolution fails for an L2T entry to handle packets
374	* on the arpq head. If a packet specifies a failure handler it is invoked,
375	* otherwise the packets is sent to the offload device.
376	*
377	* XXX: maybe we should abandon the latter behavior and just require a failure
378	* handler.
379	*/
380	static void handle_failed_resolution(struct t3cdev dev, struct* sk_buff_head *arpq)
381	{
382	struct sk_buff skb, tmp;
383
384	skb_queue_walk_safe(arpq, skb, tmp) {
385	struct l2t_skb_cb *cb = L2T_SKB_CB(skb);
386
387	__skb_unlink(skb, list: arpq);
388	if (cb->arp_failure_handler)
389	cb->arp_failure_handler(dev, skb);
390	else
391	cxgb3_ofld_send(dev, skb);
392	}
393	}
394
395	/*
396	* Called when the host's ARP layer makes a change to some entry that is
397	* loaded into the HW L2 table.
398	*/
399	void t3_l2t_update(struct t3cdev dev, struct* neighbour *neigh)
400	{
401	struct sk_buff_head arpq;
402	struct l2t_entry *e;
403	struct l2t_data *d = L2DATA(dev);
404	u32 addr = (u32 ) neigh->primary_key;
405	int ifidx = neigh->dev->ifindex;
406	int hash = arp_hash(key: addr, ifindex: ifidx, d);
407
408	read_lock_bh(&d->lock);
409	for (e = d->l2tab[hash].first; e; e = e->next)
410	if (e->addr == addr && e->ifindex == ifidx) {
411	spin_lock(lock: &e->lock);
412	goto found;
413	}
414	read_unlock_bh(&d->lock);
415	return;
416
417	found:
418	__skb_queue_head_init(list: &arpq);
419
420	read_unlock(&d->lock);
421	if (atomic_read(v: &e->refcnt)) {
422	if (neigh != e->neigh)
423	neigh_replace(e, n: neigh);
424
425	if (e->state == L2T_STATE_RESOLVING) {
426	if (neigh->nud_state & NUD_FAILED) {
427	skb_queue_splice_init(list: &e->arpq, head: &arpq);
428	} else if (neigh->nud_state & (NUD_CONNECTED\|NUD_STALE))
429	setup_l2e_send_pending(dev, NULL, e);
430	} else {
431	e->state = neigh->nud_state & NUD_CONNECTED ?
432	L2T_STATE_VALID : L2T_STATE_STALE;
433	if (!ether_addr_equal(addr1: e->dmac, addr2: neigh->ha))
434	setup_l2e_send_pending(dev, NULL, e);
435	}
436	}
437	spin_unlock_bh(lock: &e->lock);
438
439	if (!skb_queue_empty(list: &arpq))
440	handle_failed_resolution(dev, arpq: &arpq);
441	}
442
443	struct l2t_data t3_init_l2t(unsigned* int l2t_capacity)
444	{
445	struct l2t_data *d;
446	int i;
447
448	d = kvzalloc(struct_size(d, l2tab, l2t_capacity), GFP_KERNEL);
449	if (!d)
450	return NULL;
451
452	d->nentries = l2t_capacity;
453	d->rover = &d->l2tab[`1`]; / entry 0 is not used /
454	atomic_set(v: &d->nfree, i: l2t_capacity - `1`);
455	rwlock_init(&d->lock);
456
457	for (i = `0`; i < l2t_capacity; ++i) {
458	d->l2tab[i].idx = i;
459	d->l2tab[i].state = L2T_STATE_UNUSED;
460	__skb_queue_head_init(list: &d->l2tab[i].arpq);
461	spin_lock_init(&d->l2tab[i].lock);
462	atomic_set(v: &d->l2tab[i].refcnt, i: `0`);
463	}
464	return d;
465	}
466

source code of linux/drivers/net/ethernet/chelsio/cxgb3/l2t.c