1 | // SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause |
2 | |
3 | /* Packet transmit logic for Mellanox Gigabit Ethernet driver |
4 | * |
5 | * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES |
6 | */ |
7 | |
8 | #include <linux/skbuff.h> |
9 | |
10 | #include "mlxbf_gige.h" |
11 | #include "mlxbf_gige_regs.h" |
12 | |
13 | /* Transmit Initialization |
14 | * 1) Allocates TX WQE array using coherent DMA mapping |
15 | * 2) Allocates TX completion counter using coherent DMA mapping |
16 | */ |
17 | int mlxbf_gige_tx_init(struct mlxbf_gige *priv) |
18 | { |
19 | size_t size; |
20 | |
21 | size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; |
22 | priv->tx_wqe_base = dma_alloc_coherent(dev: priv->dev, size, |
23 | dma_handle: &priv->tx_wqe_base_dma, |
24 | GFP_KERNEL); |
25 | if (!priv->tx_wqe_base) |
26 | return -ENOMEM; |
27 | |
28 | priv->tx_wqe_next = priv->tx_wqe_base; |
29 | |
30 | /* Write TX WQE base address into MMIO reg */ |
31 | writeq(val: priv->tx_wqe_base_dma, addr: priv->base + MLXBF_GIGE_TX_WQ_BASE); |
32 | |
33 | /* Allocate address for TX completion count */ |
34 | priv->tx_cc = dma_alloc_coherent(dev: priv->dev, MLXBF_GIGE_TX_CC_SZ, |
35 | dma_handle: &priv->tx_cc_dma, GFP_KERNEL); |
36 | if (!priv->tx_cc) { |
37 | dma_free_coherent(dev: priv->dev, size, |
38 | cpu_addr: priv->tx_wqe_base, dma_handle: priv->tx_wqe_base_dma); |
39 | return -ENOMEM; |
40 | } |
41 | |
42 | /* Write TX CC base address into MMIO reg */ |
43 | writeq(val: priv->tx_cc_dma, addr: priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); |
44 | |
45 | writeq(ilog2(priv->tx_q_entries), |
46 | addr: priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2); |
47 | |
48 | priv->prev_tx_ci = 0; |
49 | priv->tx_pi = 0; |
50 | |
51 | return 0; |
52 | } |
53 | |
54 | /* Transmit Deinitialization |
55 | * This routine will free allocations done by mlxbf_gige_tx_init(), |
56 | * namely the TX WQE array and the TX completion counter |
57 | */ |
58 | void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv) |
59 | { |
60 | u64 *tx_wqe_addr; |
61 | size_t size; |
62 | int i; |
63 | |
64 | tx_wqe_addr = priv->tx_wqe_base; |
65 | |
66 | for (i = 0; i < priv->tx_q_entries; i++) { |
67 | if (priv->tx_skb[i]) { |
68 | dma_unmap_single(priv->dev, *tx_wqe_addr, |
69 | priv->tx_skb[i]->len, DMA_TO_DEVICE); |
70 | dev_kfree_skb(priv->tx_skb[i]); |
71 | priv->tx_skb[i] = NULL; |
72 | } |
73 | tx_wqe_addr += 2; |
74 | } |
75 | |
76 | size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; |
77 | dma_free_coherent(dev: priv->dev, size, |
78 | cpu_addr: priv->tx_wqe_base, dma_handle: priv->tx_wqe_base_dma); |
79 | |
80 | dma_free_coherent(dev: priv->dev, MLXBF_GIGE_TX_CC_SZ, |
81 | cpu_addr: priv->tx_cc, dma_handle: priv->tx_cc_dma); |
82 | |
83 | priv->tx_wqe_base = NULL; |
84 | priv->tx_wqe_base_dma = 0; |
85 | priv->tx_cc = NULL; |
86 | priv->tx_cc_dma = 0; |
87 | priv->tx_wqe_next = NULL; |
88 | writeq(val: 0, addr: priv->base + MLXBF_GIGE_TX_WQ_BASE); |
89 | writeq(val: 0, addr: priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); |
90 | } |
91 | |
92 | /* Function that returns status of TX ring: |
93 | * 0: TX ring is full, i.e. there are no |
94 | * available un-used entries in TX ring. |
95 | * non-null: TX ring is not full, i.e. there are |
96 | * some available entries in TX ring. |
97 | * The non-null value is a measure of |
98 | * how many TX entries are available, but |
99 | * it is not the exact number of available |
100 | * entries (see below). |
101 | * |
102 | * The algorithm makes the assumption that if |
103 | * (prev_tx_ci == tx_pi) then the TX ring is empty. |
104 | * An empty ring actually has (tx_q_entries-1) |
105 | * entries, which allows the algorithm to differentiate |
106 | * the case of an empty ring vs. a full ring. |
107 | */ |
108 | static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv) |
109 | { |
110 | unsigned long flags; |
111 | u16 avail; |
112 | |
113 | spin_lock_irqsave(&priv->lock, flags); |
114 | |
115 | if (priv->prev_tx_ci == priv->tx_pi) |
116 | avail = priv->tx_q_entries - 1; |
117 | else |
118 | avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi) |
119 | % priv->tx_q_entries) - 1; |
120 | |
121 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
122 | |
123 | return avail; |
124 | } |
125 | |
126 | bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv) |
127 | { |
128 | struct net_device_stats *stats; |
129 | u16 tx_wqe_index; |
130 | u64 *tx_wqe_addr; |
131 | u64 tx_status; |
132 | u16 tx_ci; |
133 | |
134 | tx_status = readq(addr: priv->base + MLXBF_GIGE_TX_STATUS); |
135 | if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL) |
136 | priv->stats.tx_fifo_full++; |
137 | tx_ci = readq(addr: priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX); |
138 | stats = &priv->netdev->stats; |
139 | |
140 | /* Transmit completion logic needs to loop until the completion |
141 | * index (in SW) equals TX consumer index (from HW). These |
142 | * parameters are unsigned 16-bit values and the wrap case needs |
143 | * to be supported, that is TX consumer index wrapped from 0xFFFF |
144 | * to 0 while TX completion index is still < 0xFFFF. |
145 | */ |
146 | for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) { |
147 | tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries; |
148 | /* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX |
149 | * buffer address and the 8 LSB contain information |
150 | * about the TX WQE. |
151 | */ |
152 | tx_wqe_addr = priv->tx_wqe_base + |
153 | (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS); |
154 | |
155 | stats->tx_packets++; |
156 | stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr); |
157 | |
158 | dma_unmap_single(priv->dev, *tx_wqe_addr, |
159 | priv->tx_skb[tx_wqe_index]->len, DMA_TO_DEVICE); |
160 | dev_consume_skb_any(skb: priv->tx_skb[tx_wqe_index]); |
161 | priv->tx_skb[tx_wqe_index] = NULL; |
162 | |
163 | /* Ensure completion of updates across all cores */ |
164 | mb(); |
165 | } |
166 | |
167 | /* Since the TX ring was likely just drained, check if TX queue |
168 | * had previously been stopped and now that there are TX buffers |
169 | * available the TX queue can be awakened. |
170 | */ |
171 | if (netif_queue_stopped(dev: priv->netdev) && |
172 | mlxbf_gige_tx_buffs_avail(priv)) |
173 | netif_wake_queue(dev: priv->netdev); |
174 | |
175 | return true; |
176 | } |
177 | |
178 | /* Function to advance the tx_wqe_next pointer to next TX WQE */ |
179 | void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv) |
180 | { |
181 | /* Advance tx_wqe_next pointer */ |
182 | priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS; |
183 | |
184 | /* Check if 'next' pointer is beyond end of TX ring */ |
185 | /* If so, set 'next' back to 'base' pointer of ring */ |
186 | if (priv->tx_wqe_next == (priv->tx_wqe_base + |
187 | (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS))) |
188 | priv->tx_wqe_next = priv->tx_wqe_base; |
189 | } |
190 | |
191 | netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb, |
192 | struct net_device *netdev) |
193 | { |
194 | struct mlxbf_gige *priv = netdev_priv(dev: netdev); |
195 | long buff_addr, start_dma_page, end_dma_page; |
196 | struct sk_buff *tx_skb; |
197 | dma_addr_t tx_buf_dma; |
198 | unsigned long flags; |
199 | u64 *tx_wqe_addr; |
200 | u64 word2; |
201 | |
202 | /* If needed, linearize TX SKB as hardware DMA expects this */ |
203 | if (skb->len > MLXBF_GIGE_DEFAULT_BUF_SZ || skb_linearize(skb)) { |
204 | dev_kfree_skb(skb); |
205 | netdev->stats.tx_dropped++; |
206 | return NETDEV_TX_OK; |
207 | } |
208 | |
209 | buff_addr = (long)skb->data; |
210 | start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT; |
211 | end_dma_page = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT; |
212 | |
213 | /* Verify that payload pointer and data length of SKB to be |
214 | * transmitted does not violate the hardware DMA limitation. |
215 | */ |
216 | if (start_dma_page != end_dma_page) { |
217 | /* DMA operation would fail as-is, alloc new aligned SKB */ |
218 | tx_skb = mlxbf_gige_alloc_skb(priv, map_len: skb->len, |
219 | buf_dma: &tx_buf_dma, dir: DMA_TO_DEVICE); |
220 | if (!tx_skb) { |
221 | /* Free original skb, could not alloc new aligned SKB */ |
222 | dev_kfree_skb(skb); |
223 | netdev->stats.tx_dropped++; |
224 | return NETDEV_TX_OK; |
225 | } |
226 | |
227 | skb_put_data(skb: tx_skb, data: skb->data, len: skb->len); |
228 | |
229 | /* Free the original SKB */ |
230 | dev_kfree_skb(skb); |
231 | } else { |
232 | tx_skb = skb; |
233 | tx_buf_dma = dma_map_single(priv->dev, skb->data, |
234 | skb->len, DMA_TO_DEVICE); |
235 | if (dma_mapping_error(dev: priv->dev, dma_addr: tx_buf_dma)) { |
236 | dev_kfree_skb(skb); |
237 | netdev->stats.tx_dropped++; |
238 | return NETDEV_TX_OK; |
239 | } |
240 | } |
241 | |
242 | /* Get address of TX WQE */ |
243 | tx_wqe_addr = priv->tx_wqe_next; |
244 | |
245 | mlxbf_gige_update_tx_wqe_next(priv); |
246 | |
247 | /* Put PA of buffer address into first 64-bit word of TX WQE */ |
248 | *tx_wqe_addr = tx_buf_dma; |
249 | |
250 | /* Set TX WQE pkt_len appropriately |
251 | * NOTE: GigE silicon will automatically pad up to |
252 | * minimum packet length if needed. |
253 | */ |
254 | word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK; |
255 | |
256 | /* Write entire 2nd word of TX WQE */ |
257 | *(tx_wqe_addr + 1) = word2; |
258 | |
259 | spin_lock_irqsave(&priv->lock, flags); |
260 | priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb; |
261 | priv->tx_pi++; |
262 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
263 | |
264 | if (!netdev_xmit_more()) { |
265 | /* Create memory barrier before write to TX PI */ |
266 | wmb(); |
267 | writeq(val: priv->tx_pi, addr: priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX); |
268 | } |
269 | |
270 | /* Check if the last TX entry was just used */ |
271 | if (!mlxbf_gige_tx_buffs_avail(priv)) { |
272 | /* TX ring is full, inform stack */ |
273 | netif_stop_queue(dev: netdev); |
274 | |
275 | /* Since there is no separate "TX complete" interrupt, need |
276 | * to explicitly schedule NAPI poll. This will trigger logic |
277 | * which processes TX completions, and will hopefully drain |
278 | * the TX ring allowing the TX queue to be awakened. |
279 | */ |
280 | napi_schedule(n: &priv->napi); |
281 | } |
282 | |
283 | return NETDEV_TX_OK; |
284 | } |
285 | |