blob: b80a91f0561f957281f4a489b4d57bf26c21a806 [file] [log] [blame]
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001/*
2 * Copyright 2012 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/moduleparam.h>
18#include <linux/sched.h>
19#include <linux/kernel.h> /* printk() */
20#include <linux/slab.h> /* kmalloc() */
21#include <linux/errno.h> /* error codes */
22#include <linux/types.h> /* size_t */
23#include <linux/interrupt.h>
24#include <linux/in.h>
25#include <linux/irq.h>
26#include <linux/netdevice.h> /* struct device, and other headers */
27#include <linux/etherdevice.h> /* eth_type_trans */
28#include <linux/skbuff.h>
29#include <linux/ioctl.h>
30#include <linux/cdev.h>
31#include <linux/hugetlb.h>
32#include <linux/in6.h>
33#include <linux/timer.h>
34#include <linux/hrtimer.h>
35#include <linux/ktime.h>
36#include <linux/io.h>
37#include <linux/ctype.h>
38#include <linux/ip.h>
39#include <linux/tcp.h>
40
41#include <asm/checksum.h>
42#include <asm/homecache.h>
43#include <gxio/mpipe.h>
44#include <arch/sim.h>
45
46/* Default transmit lockup timeout period, in jiffies. */
47#define TILE_NET_TIMEOUT (5 * HZ)
48
49/* The maximum number of distinct channels (idesc.channel is 5 bits). */
50#define TILE_NET_CHANNELS 32
51
52/* Maximum number of idescs to handle per "poll". */
53#define TILE_NET_BATCH 128
54
55/* Maximum number of packets to handle per "poll". */
56#define TILE_NET_WEIGHT 64
57
58/* Number of entries in each iqueue. */
59#define IQUEUE_ENTRIES 512
60
61/* Number of entries in each equeue. */
62#define EQUEUE_ENTRIES 2048
63
64/* Total header bytes per equeue slot. Must be big enough for 2 bytes
65 * of NET_IP_ALIGN alignment, plus 14 bytes (?) of L2 header, plus up to
66 * 60 bytes of actual TCP header. We round up to align to cache lines.
67 */
68#define HEADER_BYTES 128
69
70/* Maximum completions per cpu per device (must be a power of two).
71 * ISSUE: What is the right number here? If this is too small, then
72 * egress might block waiting for free space in a completions array.
73 * ISSUE: At the least, allocate these only for initialized echannels.
74 */
75#define TILE_NET_MAX_COMPS 64
76
77#define MAX_FRAGS (MAX_SKB_FRAGS + 1)
78
Chris Metcalf2628e8a2013-08-01 11:36:42 -040079/* The "kinds" of buffer stacks (small/large/jumbo). */
80#define MAX_KINDS 3
81
Chris Metcalfe3d62d72012-06-07 10:45:02 +000082/* Size of completions data to allocate.
83 * ISSUE: Probably more than needed since we don't use all the channels.
84 */
85#define COMPS_SIZE (TILE_NET_CHANNELS * sizeof(struct tile_net_comps))
86
87/* Size of NotifRing data to allocate. */
88#define NOTIF_RING_SIZE (IQUEUE_ENTRIES * sizeof(gxio_mpipe_idesc_t))
89
90/* Timeout to wake the per-device TX timer after we stop the queue.
91 * We don't want the timeout too short (adds overhead, and might end
92 * up causing stop/wake/stop/wake cycles) or too long (affects performance).
93 * For the 10 Gb NIC, 30 usec means roughly 30+ 1500-byte packets.
94 */
95#define TX_TIMER_DELAY_USEC 30
96
97/* Timeout to wake the per-cpu egress timer to free completions. */
98#define EGRESS_TIMER_DELAY_USEC 1000
99
100MODULE_AUTHOR("Tilera Corporation");
101MODULE_LICENSE("GPL");
102
103/* A "packet fragment" (a chunk of memory). */
104struct frag {
105 void *buf;
106 size_t length;
107};
108
109/* A single completion. */
110struct tile_net_comp {
111 /* The "complete_count" when the completion will be complete. */
112 s64 when;
113 /* The buffer to be freed when the completion is complete. */
114 struct sk_buff *skb;
115};
116
117/* The completions for a given cpu and echannel. */
118struct tile_net_comps {
119 /* The completions. */
120 struct tile_net_comp comp_queue[TILE_NET_MAX_COMPS];
121 /* The number of completions used. */
122 unsigned long comp_next;
123 /* The number of completions freed. */
124 unsigned long comp_last;
125};
126
127/* The transmit wake timer for a given cpu and echannel. */
128struct tile_net_tx_wake {
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400129 int tx_queue_idx;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000130 struct hrtimer timer;
131 struct net_device *dev;
132};
133
134/* Info for a specific cpu. */
135struct tile_net_info {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000136 /* Our cpu. */
137 int my_cpu;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000138 /* A timer for handling egress completions. */
139 struct hrtimer egress_timer;
140 /* True if "egress_timer" is scheduled. */
141 bool egress_timer_scheduled;
Chris Metcalff3286a32013-08-01 11:36:42 -0400142 struct info_mpipe {
143 /* Packet queue. */
144 gxio_mpipe_iqueue_t iqueue;
145 /* The NAPI struct. */
146 struct napi_struct napi;
147 /* Number of buffers (by kind) which must still be provided. */
148 unsigned int num_needed_buffers[MAX_KINDS];
149 /* instance id. */
150 int instance;
151 /* True if iqueue is valid. */
152 bool has_iqueue;
153 /* NAPI flags. */
154 bool napi_added;
155 bool napi_enabled;
156 /* Comps for each egress channel. */
157 struct tile_net_comps *comps_for_echannel[TILE_NET_CHANNELS];
158 /* Transmit wake timer for each egress channel. */
159 struct tile_net_tx_wake tx_wake[TILE_NET_CHANNELS];
160 } mpipe[NR_MPIPE_MAX];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000161};
162
163/* Info for egress on a particular egress channel. */
164struct tile_net_egress {
165 /* The "equeue". */
166 gxio_mpipe_equeue_t *equeue;
167 /* The headers for TSO. */
168 unsigned char *headers;
169};
170
171/* Info for a specific device. */
172struct tile_net_priv {
173 /* Our network device. */
174 struct net_device *dev;
175 /* The primary link. */
176 gxio_mpipe_link_t link;
177 /* The primary channel, if open, else -1. */
178 int channel;
179 /* The "loopify" egress link, if needed. */
180 gxio_mpipe_link_t loopify_link;
181 /* The "loopify" egress channel, if open, else -1. */
182 int loopify_channel;
183 /* The egress channel (channel or loopify_channel). */
184 int echannel;
Chris Metcalff3286a32013-08-01 11:36:42 -0400185 /* mPIPE instance, 0 or 1. */
186 int instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000187};
188
Chris Metcalff3286a32013-08-01 11:36:42 -0400189static struct mpipe_data {
190 /* The ingress irq. */
191 int ingress_irq;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000192
Chris Metcalff3286a32013-08-01 11:36:42 -0400193 /* The "context" for all devices. */
194 gxio_mpipe_context_t context;
195
196 /* Egress info, indexed by "priv->echannel"
197 * (lazily created as needed).
198 */
199 struct tile_net_egress
200 egress_for_echannel[TILE_NET_CHANNELS];
201
202 /* Devices currently associated with each channel.
203 * NOTE: The array entry can become NULL after ifconfig down, but
204 * we do not free the underlying net_device structures, so it is
205 * safe to use a pointer after reading it from this array.
206 */
207 struct net_device
208 *tile_net_devs_for_channel[TILE_NET_CHANNELS];
209
210 /* The actual memory allocated for the buffer stacks. */
211 void *buffer_stack_vas[MAX_KINDS];
212
213 /* The amount of memory allocated for each buffer stack. */
214 size_t buffer_stack_bytes[MAX_KINDS];
215
216 /* The first buffer stack index
217 * (small = +0, large = +1, jumbo = +2).
218 */
219 int first_buffer_stack;
220
221 /* The buckets. */
222 int first_bucket;
223 int num_buckets;
224
225} mpipe_data[NR_MPIPE_MAX] = {
226 [0 ... (NR_MPIPE_MAX - 1)] {
227 .ingress_irq = -1,
228 .first_buffer_stack = -1,
229 .first_bucket = -1,
230 .num_buckets = 1
231 }
232};
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000233
234/* A mutex for "tile_net_devs_for_channel". */
235static DEFINE_MUTEX(tile_net_devs_for_channel_mutex);
236
237/* The per-cpu info. */
238static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info);
239
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000240
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400241/* The buffer size enums for each buffer stack.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000242 * See arch/tile/include/gxio/mpipe.h for the set of possible values.
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400243 * We avoid the "10384" size because it can induce "false chaining"
244 * on "cut-through" jumbo packets.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000245 */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400246static gxio_mpipe_buffer_size_enum_t buffer_size_enums[MAX_KINDS] = {
247 GXIO_MPIPE_BUFFER_SIZE_128,
248 GXIO_MPIPE_BUFFER_SIZE_1664,
249 GXIO_MPIPE_BUFFER_SIZE_16384
250};
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000251
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000252/* Text value of tile_net.cpus if passed as a module parameter. */
253static char *network_cpus_string;
254
255/* The actual cpus in "network_cpus". */
256static struct cpumask network_cpus_map;
257
258/* If "loopify=LINK" was specified, this is "LINK". */
259static char *loopify_link_name;
260
261/* If "tile_net.custom" was specified, this is non-NULL. */
262static char *custom_str;
263
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400264/* If "tile_net.jumbo=NUM" was specified, this is "NUM". */
265static uint jumbo_num;
266
Chris Metcalff3286a32013-08-01 11:36:42 -0400267/* Obtain mpipe instance from struct tile_net_priv given struct net_device. */
268static inline int mpipe_instance(struct net_device *dev)
269{
270 struct tile_net_priv *priv = netdev_priv(dev);
271 return priv->instance;
272}
273
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000274/* The "tile_net.cpus" argument specifies the cpus that are dedicated
275 * to handle ingress packets.
276 *
277 * The parameter should be in the form "tile_net.cpus=m-n[,x-y]", where
278 * m, n, x, y are integer numbers that represent the cpus that can be
279 * neither a dedicated cpu nor a dataplane cpu.
280 */
281static bool network_cpus_init(void)
282{
283 char buf[1024];
284 int rc;
285
286 if (network_cpus_string == NULL)
287 return false;
288
289 rc = cpulist_parse_crop(network_cpus_string, &network_cpus_map);
290 if (rc != 0) {
291 pr_warn("tile_net.cpus=%s: malformed cpu list\n",
292 network_cpus_string);
293 return false;
294 }
295
296 /* Remove dedicated cpus. */
297 cpumask_and(&network_cpus_map, &network_cpus_map, cpu_possible_mask);
298
299 if (cpumask_empty(&network_cpus_map)) {
300 pr_warn("Ignoring empty tile_net.cpus='%s'.\n",
301 network_cpus_string);
302 return false;
303 }
304
305 cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
306 pr_info("Linux network CPUs: %s\n", buf);
307 return true;
308}
309
310module_param_named(cpus, network_cpus_string, charp, 0444);
311MODULE_PARM_DESC(cpus, "cpulist of cores that handle network interrupts");
312
313/* The "tile_net.loopify=LINK" argument causes the named device to
314 * actually use "loop0" for ingress, and "loop1" for egress. This
315 * allows an app to sit between the actual link and linux, passing
316 * (some) packets along to linux, and forwarding (some) packets sent
317 * out by linux.
318 */
319module_param_named(loopify, loopify_link_name, charp, 0444);
320MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress");
321
322/* The "tile_net.custom" argument causes us to ignore the "conventional"
323 * classifier metadata, in particular, the "l2_offset".
324 */
325module_param_named(custom, custom_str, charp, 0444);
326MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier");
327
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400328/* The "tile_net.jumbo" argument causes us to support "jumbo" packets,
329 * and to allocate the given number of "jumbo" buffers.
330 */
331module_param_named(jumbo, jumbo_num, uint, 0444);
332MODULE_PARM_DESC(jumbo, "the number of buffers to support jumbo packets");
333
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000334/* Atomically update a statistics field.
335 * Note that on TILE-Gx, this operation is fire-and-forget on the
336 * issuing core (single-cycle dispatch) and takes only a few cycles
337 * longer than a regular store when the request reaches the home cache.
338 * No expensive bus management overhead is required.
339 */
340static void tile_net_stats_add(unsigned long value, unsigned long *field)
341{
342 BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(unsigned long));
343 atomic_long_add(value, (atomic_long_t *)field);
344}
345
346/* Allocate and push a buffer. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400347static bool tile_net_provide_buffer(int instance, int kind)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000348{
Chris Metcalff3286a32013-08-01 11:36:42 -0400349 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400350 gxio_mpipe_buffer_size_enum_t bse = buffer_size_enums[kind];
351 size_t bs = gxio_mpipe_buffer_size_enum_to_buffer_size(bse);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000352 const unsigned long buffer_alignment = 128;
353 struct sk_buff *skb;
354 int len;
355
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400356 len = sizeof(struct sk_buff **) + buffer_alignment + bs;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000357 skb = dev_alloc_skb(len);
358 if (skb == NULL)
359 return false;
360
361 /* Make room for a back-pointer to 'skb' and guarantee alignment. */
362 skb_reserve(skb, sizeof(struct sk_buff **));
363 skb_reserve(skb, -(long)skb->data & (buffer_alignment - 1));
364
365 /* Save a back-pointer to 'skb'. */
366 *(struct sk_buff **)(skb->data - sizeof(struct sk_buff **)) = skb;
367
368 /* Make sure "skb" and the back-pointer have been flushed. */
369 wmb();
370
Chris Metcalff3286a32013-08-01 11:36:42 -0400371 gxio_mpipe_push_buffer(&md->context, md->first_buffer_stack + kind,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000372 (void *)va_to_tile_io_addr(skb->data));
373
374 return true;
375}
376
377/* Convert a raw mpipe buffer to its matching skb pointer. */
378static struct sk_buff *mpipe_buf_to_skb(void *va)
379{
380 /* Acquire the associated "skb". */
381 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
382 struct sk_buff *skb = *skb_ptr;
383
384 /* Paranoia. */
385 if (skb->data != va) {
386 /* Panic here since there's a reasonable chance
387 * that corrupt buffers means generic memory
388 * corruption, with unpredictable system effects.
389 */
390 panic("Corrupt linux buffer! va=%p, skb=%p, skb->data=%p",
391 va, skb, skb->data);
392 }
393
394 return skb;
395}
396
Chris Metcalff3286a32013-08-01 11:36:42 -0400397static void tile_net_pop_all_buffers(int instance, int stack)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000398{
Chris Metcalff3286a32013-08-01 11:36:42 -0400399 struct mpipe_data *md = &mpipe_data[instance];
400
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000401 for (;;) {
402 tile_io_addr_t addr =
Chris Metcalff3286a32013-08-01 11:36:42 -0400403 (tile_io_addr_t)gxio_mpipe_pop_buffer(&md->context,
404 stack);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000405 if (addr == 0)
406 break;
407 dev_kfree_skb_irq(mpipe_buf_to_skb(tile_io_addr_to_va(addr)));
408 }
409}
410
411/* Provide linux buffers to mPIPE. */
412static void tile_net_provide_needed_buffers(void)
413{
414 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400415 int instance, kind;
416 for (instance = 0; instance < NR_MPIPE_MAX &&
417 info->mpipe[instance].has_iqueue; instance++) {
418 for (kind = 0; kind < MAX_KINDS; kind++) {
419 while (info->mpipe[instance].num_needed_buffers[kind]
420 != 0) {
421 if (!tile_net_provide_buffer(instance, kind)) {
422 pr_notice("Tile %d still needs"
423 " some buffers\n",
424 info->my_cpu);
425 return;
426 }
427 info->mpipe[instance].
428 num_needed_buffers[kind]--;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400429 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400430 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000431 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000432}
433
434static inline bool filter_packet(struct net_device *dev, void *buf)
435{
436 /* Filter packets received before we're up. */
437 if (dev == NULL || !(dev->flags & IFF_UP))
438 return true;
439
440 /* Filter out packets that aren't for us. */
441 if (!(dev->flags & IFF_PROMISC) &&
442 !is_multicast_ether_addr(buf) &&
443 compare_ether_addr(dev->dev_addr, buf) != 0)
444 return true;
445
446 return false;
447}
448
449static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb,
450 gxio_mpipe_idesc_t *idesc, unsigned long len)
451{
452 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400453 int instance = mpipe_instance(dev);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000454
455 /* Encode the actual packet length. */
456 skb_put(skb, len);
457
458 skb->protocol = eth_type_trans(skb, dev);
459
460 /* Acknowledge "good" hardware checksums. */
461 if (idesc->cs && idesc->csum_seed_val == 0xFFFF)
462 skb->ip_summed = CHECKSUM_UNNECESSARY;
463
Chris Metcalff3286a32013-08-01 11:36:42 -0400464 napi_gro_receive(&info->mpipe[instance].napi, skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000465
466 /* Update stats. */
Chris Metcalfad018182013-08-01 11:36:42 -0400467 tile_net_stats_add(1, &dev->stats.rx_packets);
468 tile_net_stats_add(len, &dev->stats.rx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000469
470 /* Need a new buffer. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400471 if (idesc->size == buffer_size_enums[0])
Chris Metcalff3286a32013-08-01 11:36:42 -0400472 info->mpipe[instance].num_needed_buffers[0]++;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400473 else if (idesc->size == buffer_size_enums[1])
Chris Metcalff3286a32013-08-01 11:36:42 -0400474 info->mpipe[instance].num_needed_buffers[1]++;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000475 else
Chris Metcalff3286a32013-08-01 11:36:42 -0400476 info->mpipe[instance].num_needed_buffers[2]++;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000477}
478
479/* Handle a packet. Return true if "processed", false if "filtered". */
Chris Metcalff3286a32013-08-01 11:36:42 -0400480static bool tile_net_handle_packet(int instance, gxio_mpipe_idesc_t *idesc)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000481{
482 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400483 struct mpipe_data *md = &mpipe_data[instance];
484 struct net_device *dev = md->tile_net_devs_for_channel[idesc->channel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000485 uint8_t l2_offset;
486 void *va;
487 void *buf;
488 unsigned long len;
489 bool filter;
490
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400491 /* Drop packets for which no buffer was available (which can
492 * happen under heavy load), or for which the me/tr/ce flags
493 * are set (which can happen for jumbo cut-through packets,
494 * or with a customized classifier).
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000495 */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400496 if (idesc->be || idesc->me || idesc->tr || idesc->ce) {
497 if (dev)
Chris Metcalfad018182013-08-01 11:36:42 -0400498 tile_net_stats_add(1, &dev->stats.rx_errors);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400499 goto drop;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000500 }
501
502 /* Get the "l2_offset", if allowed. */
503 l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc);
504
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400505 /* Get the VA (including NET_IP_ALIGN bytes of "headroom"). */
506 va = tile_io_addr_to_va((unsigned long)idesc->va);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000507
508 /* Get the actual packet start/length. */
509 buf = va + l2_offset;
510 len = idesc->l2_size - l2_offset;
511
512 /* Point "va" at the raw buffer. */
513 va -= NET_IP_ALIGN;
514
515 filter = filter_packet(dev, buf);
516 if (filter) {
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400517 if (dev)
Chris Metcalfad018182013-08-01 11:36:42 -0400518 tile_net_stats_add(1, &dev->stats.rx_dropped);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400519drop:
Chris Metcalff3286a32013-08-01 11:36:42 -0400520 gxio_mpipe_iqueue_drop(&info->mpipe[instance].iqueue, idesc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000521 } else {
522 struct sk_buff *skb = mpipe_buf_to_skb(va);
523
524 /* Skip headroom, and any custom header. */
525 skb_reserve(skb, NET_IP_ALIGN + l2_offset);
526
527 tile_net_receive_skb(dev, skb, idesc, len);
528 }
529
Chris Metcalff3286a32013-08-01 11:36:42 -0400530 gxio_mpipe_iqueue_consume(&info->mpipe[instance].iqueue, idesc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000531 return !filter;
532}
533
534/* Handle some packets for the current CPU.
535 *
536 * This function handles up to TILE_NET_BATCH idescs per call.
537 *
538 * ISSUE: Since we do not provide new buffers until this function is
539 * complete, we must initially provide enough buffers for each network
540 * cpu to fill its iqueue and also its batched idescs.
541 *
542 * ISSUE: The "rotting packet" race condition occurs if a packet
543 * arrives after the queue appears to be empty, and before the
544 * hypervisor interrupt is re-enabled.
545 */
546static int tile_net_poll(struct napi_struct *napi, int budget)
547{
548 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
549 unsigned int work = 0;
550 gxio_mpipe_idesc_t *idesc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400551 int instance, i, n;
552 struct mpipe_data *md;
553 struct info_mpipe *info_mpipe =
554 container_of(napi, struct info_mpipe, napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000555
Chris Metcalff3286a32013-08-01 11:36:42 -0400556 instance = info_mpipe->instance;
557 while ((n = gxio_mpipe_iqueue_try_peek(
558 &info_mpipe->iqueue,
559 &idesc)) > 0) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000560 for (i = 0; i < n; i++) {
561 if (i == TILE_NET_BATCH)
562 goto done;
Chris Metcalff3286a32013-08-01 11:36:42 -0400563 if (tile_net_handle_packet(instance,
564 idesc + i)) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000565 if (++work >= budget)
566 goto done;
567 }
568 }
569 }
570
571 /* There are no packets left. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400572 napi_complete(&info_mpipe->napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000573
Chris Metcalff3286a32013-08-01 11:36:42 -0400574 md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000575 /* Re-enable hypervisor interrupts. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400576 gxio_mpipe_enable_notif_ring_interrupt(
577 &md->context, info->mpipe[instance].iqueue.ring);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000578
579 /* HACK: Avoid the "rotting packet" problem. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400580 if (gxio_mpipe_iqueue_try_peek(&info_mpipe->iqueue, &idesc) > 0)
581 napi_schedule(&info_mpipe->napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000582
583 /* ISSUE: Handle completions? */
584
585done:
586 tile_net_provide_needed_buffers();
587
588 return work;
589}
590
Chris Metcalff3286a32013-08-01 11:36:42 -0400591/* Handle an ingress interrupt from an instance on the current cpu. */
592static irqreturn_t tile_net_handle_ingress_irq(int irq, void *id)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000593{
594 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400595 napi_schedule(&info->mpipe[(uint64_t)id].napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000596 return IRQ_HANDLED;
597}
598
599/* Free some completions. This must be called with interrupts blocked. */
600static int tile_net_free_comps(gxio_mpipe_equeue_t *equeue,
601 struct tile_net_comps *comps,
602 int limit, bool force_update)
603{
604 int n = 0;
605 while (comps->comp_last < comps->comp_next) {
606 unsigned int cid = comps->comp_last % TILE_NET_MAX_COMPS;
607 struct tile_net_comp *comp = &comps->comp_queue[cid];
608 if (!gxio_mpipe_equeue_is_complete(equeue, comp->when,
609 force_update || n == 0))
610 break;
611 dev_kfree_skb_irq(comp->skb);
612 comps->comp_last++;
613 if (++n == limit)
614 break;
615 }
616 return n;
617}
618
619/* Add a completion. This must be called with interrupts blocked.
620 * tile_net_equeue_try_reserve() will have ensured a free completion entry.
621 */
622static void add_comp(gxio_mpipe_equeue_t *equeue,
623 struct tile_net_comps *comps,
624 uint64_t when, struct sk_buff *skb)
625{
626 int cid = comps->comp_next % TILE_NET_MAX_COMPS;
627 comps->comp_queue[cid].when = when;
628 comps->comp_queue[cid].skb = skb;
629 comps->comp_next++;
630}
631
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400632static void tile_net_schedule_tx_wake_timer(struct net_device *dev,
633 int tx_queue_idx)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000634{
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400635 struct tile_net_info *info = &per_cpu(per_cpu_info, tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000636 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -0400637 int instance = priv->instance;
638 struct tile_net_tx_wake *tx_wake =
639 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000640
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400641 hrtimer_start(&tx_wake->timer,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000642 ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL),
643 HRTIMER_MODE_REL_PINNED);
644}
645
646static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t)
647{
648 struct tile_net_tx_wake *tx_wake =
649 container_of(t, struct tile_net_tx_wake, timer);
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400650 netif_wake_subqueue(tx_wake->dev, tx_wake->tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000651 return HRTIMER_NORESTART;
652}
653
654/* Make sure the egress timer is scheduled. */
655static void tile_net_schedule_egress_timer(void)
656{
657 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
658
659 if (!info->egress_timer_scheduled) {
660 hrtimer_start(&info->egress_timer,
661 ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL),
662 HRTIMER_MODE_REL_PINNED);
663 info->egress_timer_scheduled = true;
664 }
665}
666
667/* The "function" for "info->egress_timer".
668 *
669 * This timer will reschedule itself as long as there are any pending
670 * completions expected for this tile.
671 */
672static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t)
673{
674 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
675 unsigned long irqflags;
676 bool pending = false;
Chris Metcalff3286a32013-08-01 11:36:42 -0400677 int i, instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000678
679 local_irq_save(irqflags);
680
681 /* The timer is no longer scheduled. */
682 info->egress_timer_scheduled = false;
683
684 /* Free all possible comps for this tile. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400685 for (instance = 0; instance < NR_MPIPE_MAX &&
686 info->mpipe[instance].has_iqueue; instance++) {
687 for (i = 0; i < TILE_NET_CHANNELS; i++) {
688 struct tile_net_egress *egress =
689 &mpipe_data[instance].egress_for_echannel[i];
690 struct tile_net_comps *comps =
691 info->mpipe[instance].comps_for_echannel[i];
692 if (!egress || comps->comp_last >= comps->comp_next)
693 continue;
694 tile_net_free_comps(egress->equeue, comps, -1, true);
695 pending = pending ||
696 (comps->comp_last < comps->comp_next);
697 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000698 }
699
700 /* Reschedule timer if needed. */
701 if (pending)
702 tile_net_schedule_egress_timer();
703
704 local_irq_restore(irqflags);
705
706 return HRTIMER_NORESTART;
707}
708
Chris Metcalff3286a32013-08-01 11:36:42 -0400709/* Helper functions for "tile_net_update()". */
710static void enable_ingress_irq(void *irq)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000711{
Chris Metcalff3286a32013-08-01 11:36:42 -0400712 enable_percpu_irq((long)irq, 0);
713}
714
715static void disable_ingress_irq(void *irq)
716{
717 disable_percpu_irq((long)irq);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000718}
719
720/* Helper function for tile_net_open() and tile_net_stop().
721 * Always called under tile_net_devs_for_channel_mutex.
722 */
723static int tile_net_update(struct net_device *dev)
724{
725 static gxio_mpipe_rules_t rules; /* too big to fit on the stack */
726 bool saw_channel = false;
Chris Metcalff3286a32013-08-01 11:36:42 -0400727 int instance = mpipe_instance(dev);
728 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000729 int channel;
730 int rc;
731 int cpu;
732
Chris Metcalff3286a32013-08-01 11:36:42 -0400733 saw_channel = false;
734 gxio_mpipe_rules_init(&rules, &md->context);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000735
736 for (channel = 0; channel < TILE_NET_CHANNELS; channel++) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400737 if (md->tile_net_devs_for_channel[channel] == NULL)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000738 continue;
739 if (!saw_channel) {
740 saw_channel = true;
Chris Metcalff3286a32013-08-01 11:36:42 -0400741 gxio_mpipe_rules_begin(&rules, md->first_bucket,
742 md->num_buckets, NULL);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000743 gxio_mpipe_rules_set_headroom(&rules, NET_IP_ALIGN);
744 }
745 gxio_mpipe_rules_add_channel(&rules, channel);
746 }
747
748 /* NOTE: This can fail if there is no classifier.
749 * ISSUE: Can anything else cause it to fail?
750 */
751 rc = gxio_mpipe_rules_commit(&rules);
752 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400753 netdev_warn(dev, "gxio_mpipe_rules_commit: mpipe[%d] %d\n",
754 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000755 return -EIO;
756 }
757
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400758 /* Update all cpus, sequentially (to protect "netif_napi_add()").
759 * We use on_each_cpu to handle the IPI mask or unmask.
760 */
761 if (!saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400762 on_each_cpu(disable_ingress_irq,
763 (void *)(long)(md->ingress_irq), 1);
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400764 for_each_online_cpu(cpu) {
765 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -0400766
767 if (!info->mpipe[instance].has_iqueue)
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400768 continue;
769 if (saw_channel) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400770 if (!info->mpipe[instance].napi_added) {
771 netif_napi_add(dev, &info->mpipe[instance].napi,
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400772 tile_net_poll, TILE_NET_WEIGHT);
Chris Metcalff3286a32013-08-01 11:36:42 -0400773 info->mpipe[instance].napi_added = true;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400774 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400775 if (!info->mpipe[instance].napi_enabled) {
776 napi_enable(&info->mpipe[instance].napi);
777 info->mpipe[instance].napi_enabled = true;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400778 }
779 } else {
Chris Metcalff3286a32013-08-01 11:36:42 -0400780 if (info->mpipe[instance].napi_enabled) {
781 napi_disable(&info->mpipe[instance].napi);
782 info->mpipe[instance].napi_enabled = false;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400783 }
784 /* FIXME: Drain the iqueue. */
785 }
786 }
787 if (saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400788 on_each_cpu(enable_ingress_irq,
789 (void *)(long)(md->ingress_irq), 1);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000790
791 /* HACK: Allow packets to flow in the simulator. */
792 if (saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400793 sim_enable_mpipe_links(instance, -1);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000794
795 return 0;
796}
797
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400798/* Initialize a buffer stack. */
799static int create_buffer_stack(struct net_device *dev,
800 int kind, size_t num_buffers)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000801{
802 pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH);
Chris Metcalff3286a32013-08-01 11:36:42 -0400803 int instance = mpipe_instance(dev);
804 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400805 size_t needed = gxio_mpipe_calc_buffer_stack_bytes(num_buffers);
Chris Metcalff3286a32013-08-01 11:36:42 -0400806 int stack_idx = md->first_buffer_stack + kind;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400807 void *va;
808 int i, rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000809
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400810 /* Round up to 64KB and then use alloc_pages() so we get the
811 * required 64KB alignment.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000812 */
Chris Metcalff3286a32013-08-01 11:36:42 -0400813 md->buffer_stack_bytes[kind] =
814 ALIGN(needed, 64 * 1024);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000815
Chris Metcalff3286a32013-08-01 11:36:42 -0400816 va = alloc_pages_exact(md->buffer_stack_bytes[kind], GFP_KERNEL);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400817 if (va == NULL) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000818 netdev_err(dev,
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400819 "Could not alloc %zd bytes for buffer stack %d\n",
Chris Metcalff3286a32013-08-01 11:36:42 -0400820 md->buffer_stack_bytes[kind], kind);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000821 return -ENOMEM;
822 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400823
824 /* Initialize the buffer stack. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400825 rc = gxio_mpipe_init_buffer_stack(&md->context, stack_idx,
826 buffer_size_enums[kind], va,
827 md->buffer_stack_bytes[kind], 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000828 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400829 netdev_err(dev, "gxio_mpipe_init_buffer_stack: mpipe[%d] %d\n",
830 instance, rc);
831 free_pages_exact(va, md->buffer_stack_bytes[kind]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000832 return rc;
833 }
834
Chris Metcalff3286a32013-08-01 11:36:42 -0400835 md->buffer_stack_vas[kind] = va;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400836
Chris Metcalff3286a32013-08-01 11:36:42 -0400837 rc = gxio_mpipe_register_client_memory(&md->context, stack_idx,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000838 hash_pte, 0);
839 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400840 netdev_err(dev,
841 "gxio_mpipe_register_client_memory: mpipe[%d] %d\n",
842 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000843 return rc;
844 }
845
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400846 /* Provide initial buffers. */
847 for (i = 0; i < num_buffers; i++) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400848 if (!tile_net_provide_buffer(instance, kind)) {
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400849 netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
850 return -ENOMEM;
851 }
852 }
853
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000854 return 0;
855}
856
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400857/* Allocate and initialize mpipe buffer stacks, and register them in
858 * the mPIPE TLBs, for small, large, and (possibly) jumbo packet sizes.
859 * This routine supports tile_net_init_mpipe(), below.
860 */
861static int init_buffer_stacks(struct net_device *dev,
862 int network_cpus_count)
863{
864 int num_kinds = MAX_KINDS - (jumbo_num == 0);
865 size_t num_buffers;
866 int rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400867 int instance = mpipe_instance(dev);
868 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400869
870 /* Allocate the buffer stacks. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400871 rc = gxio_mpipe_alloc_buffer_stacks(&md->context, num_kinds, 0, 0);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400872 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400873 netdev_err(dev,
874 "gxio_mpipe_alloc_buffer_stacks: mpipe[%d] %d\n",
875 instance, rc);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400876 return rc;
877 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400878 md->first_buffer_stack = rc;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400879
880 /* Enough small/large buffers to (normally) avoid buffer errors. */
881 num_buffers =
882 network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH);
883
884 /* Allocate the small memory stack. */
885 if (rc >= 0)
886 rc = create_buffer_stack(dev, 0, num_buffers);
887
888 /* Allocate the large buffer stack. */
889 if (rc >= 0)
890 rc = create_buffer_stack(dev, 1, num_buffers);
891
892 /* Allocate the jumbo buffer stack if needed. */
893 if (rc >= 0 && jumbo_num != 0)
894 rc = create_buffer_stack(dev, 2, jumbo_num);
895
896 return rc;
897}
898
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000899/* Allocate per-cpu resources (memory for completions and idescs).
900 * This routine supports tile_net_init_mpipe(), below.
901 */
902static int alloc_percpu_mpipe_resources(struct net_device *dev,
903 int cpu, int ring)
904{
905 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
906 int order, i, rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400907 int instance = mpipe_instance(dev);
908 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000909 struct page *page;
910 void *addr;
911
912 /* Allocate the "comps". */
913 order = get_order(COMPS_SIZE);
914 page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
915 if (page == NULL) {
916 netdev_err(dev, "Failed to alloc %zd bytes comps memory\n",
917 COMPS_SIZE);
918 return -ENOMEM;
919 }
920 addr = pfn_to_kaddr(page_to_pfn(page));
921 memset(addr, 0, COMPS_SIZE);
922 for (i = 0; i < TILE_NET_CHANNELS; i++)
Chris Metcalff3286a32013-08-01 11:36:42 -0400923 info->mpipe[instance].comps_for_echannel[i] =
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000924 addr + i * sizeof(struct tile_net_comps);
925
926 /* If this is a network cpu, create an iqueue. */
927 if (cpu_isset(cpu, network_cpus_map)) {
928 order = get_order(NOTIF_RING_SIZE);
929 page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
930 if (page == NULL) {
931 netdev_err(dev,
932 "Failed to alloc %zd bytes iqueue memory\n",
933 NOTIF_RING_SIZE);
934 return -ENOMEM;
935 }
936 addr = pfn_to_kaddr(page_to_pfn(page));
Chris Metcalff3286a32013-08-01 11:36:42 -0400937 rc = gxio_mpipe_iqueue_init(&info->mpipe[instance].iqueue,
938 &md->context, ring++, addr,
939 NOTIF_RING_SIZE, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000940 if (rc < 0) {
941 netdev_err(dev,
942 "gxio_mpipe_iqueue_init failed: %d\n", rc);
943 return rc;
944 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400945 info->mpipe[instance].has_iqueue = true;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000946 }
947
948 return ring;
949}
950
951/* Initialize NotifGroup and buckets.
952 * This routine supports tile_net_init_mpipe(), below.
953 */
954static int init_notif_group_and_buckets(struct net_device *dev,
955 int ring, int network_cpus_count)
956{
957 int group, rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400958 int instance = mpipe_instance(dev);
959 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000960
961 /* Allocate one NotifGroup. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400962 rc = gxio_mpipe_alloc_notif_groups(&md->context, 1, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000963 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400964 netdev_err(dev, "gxio_mpipe_alloc_notif_groups: mpipe[%d] %d\n",
965 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000966 return rc;
967 }
968 group = rc;
969
970 /* Initialize global num_buckets value. */
971 if (network_cpus_count > 4)
Chris Metcalff3286a32013-08-01 11:36:42 -0400972 md->num_buckets = 256;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000973 else if (network_cpus_count > 1)
Chris Metcalff3286a32013-08-01 11:36:42 -0400974 md->num_buckets = 16;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000975
976 /* Allocate some buckets, and set global first_bucket value. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400977 rc = gxio_mpipe_alloc_buckets(&md->context, md->num_buckets, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000978 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400979 netdev_err(dev, "gxio_mpipe_alloc_buckets: mpipe[%d] %d\n",
980 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000981 return rc;
982 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400983 md->first_bucket = rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000984
985 /* Init group and buckets. */
986 rc = gxio_mpipe_init_notif_group_and_buckets(
Chris Metcalff3286a32013-08-01 11:36:42 -0400987 &md->context, group, ring, network_cpus_count,
988 md->first_bucket, md->num_buckets,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000989 GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY);
990 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400991 netdev_err(dev, "gxio_mpipe_init_notif_group_and_buckets: "
992 "mpipe[%d] %d\n", instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000993 return rc;
994 }
995
996 return 0;
997}
998
999/* Create an irq and register it, then activate the irq and request
1000 * interrupts on all cores. Note that "ingress_irq" being initialized
1001 * is how we know not to call tile_net_init_mpipe() again.
1002 * This routine supports tile_net_init_mpipe(), below.
1003 */
1004static int tile_net_setup_interrupts(struct net_device *dev)
1005{
Chris Metcalff3286a32013-08-01 11:36:42 -04001006 int cpu, rc, irq;
1007 int instance = mpipe_instance(dev);
1008 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001009
Chris Metcalff3286a32013-08-01 11:36:42 -04001010 irq = md->ingress_irq;
1011 if (irq < 0) {
1012 irq = create_irq();
1013 if (irq < 0) {
1014 netdev_err(dev,
1015 "create_irq failed: mpipe[%d] %d\n",
1016 instance, irq);
1017 return irq;
1018 }
1019 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1020
1021 rc = request_irq(irq, tile_net_handle_ingress_irq,
1022 0, "tile_net", (void *)((uint64_t)instance));
1023
1024 if (rc != 0) {
1025 netdev_err(dev, "request_irq failed: mpipe[%d] %d\n",
1026 instance, rc);
1027 destroy_irq(irq);
1028 return rc;
1029 }
1030 md->ingress_irq = irq;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001031 }
1032
1033 for_each_online_cpu(cpu) {
1034 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -04001035 if (info->mpipe[instance].has_iqueue) {
1036 gxio_mpipe_request_notif_ring_interrupt(&md->context,
1037 cpu_x(cpu), cpu_y(cpu), KERNEL_PL, irq,
1038 info->mpipe[instance].iqueue.ring);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001039 }
1040 }
1041
1042 return 0;
1043}
1044
1045/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001046static void tile_net_init_mpipe_fail(int instance)
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001047{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001048 int kind, cpu;
Chris Metcalff3286a32013-08-01 11:36:42 -04001049 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001050
1051 /* Do cleanups that require the mpipe context first. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001052 for (kind = 0; kind < MAX_KINDS; kind++) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001053 if (md->buffer_stack_vas[kind] != NULL) {
1054 tile_net_pop_all_buffers(instance,
1055 md->first_buffer_stack +
1056 kind);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001057 }
1058 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001059
1060 /* Destroy mpipe context so the hardware no longer owns any memory. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001061 gxio_mpipe_destroy(&md->context);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001062
1063 for_each_online_cpu(cpu) {
1064 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -04001065 free_pages(
1066 (unsigned long)(
1067 info->mpipe[instance].comps_for_echannel[0]),
1068 get_order(COMPS_SIZE));
1069 info->mpipe[instance].comps_for_echannel[0] = NULL;
1070 free_pages((unsigned long)(info->mpipe[instance].iqueue.idescs),
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001071 get_order(NOTIF_RING_SIZE));
Chris Metcalff3286a32013-08-01 11:36:42 -04001072 info->mpipe[instance].iqueue.idescs = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001073 }
1074
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001075 for (kind = 0; kind < MAX_KINDS; kind++) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001076 if (md->buffer_stack_vas[kind] != NULL) {
1077 free_pages_exact(md->buffer_stack_vas[kind],
1078 md->buffer_stack_bytes[kind]);
1079 md->buffer_stack_vas[kind] = NULL;
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001080 }
1081 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001082
Chris Metcalff3286a32013-08-01 11:36:42 -04001083 md->first_buffer_stack = -1;
1084 md->first_bucket = -1;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001085}
1086
1087/* The first time any tilegx network device is opened, we initialize
1088 * the global mpipe state. If this step fails, we fail to open the
1089 * device, but if it succeeds, we never need to do it again, and since
1090 * tile_net can't be unloaded, we never undo it.
1091 *
1092 * Note that some resources in this path (buffer stack indices,
1093 * bindings from init_buffer_stack, etc.) are hypervisor resources
1094 * that are freed implicitly by gxio_mpipe_destroy().
1095 */
1096static int tile_net_init_mpipe(struct net_device *dev)
1097{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001098 int rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001099 int cpu;
1100 int first_ring, ring;
Chris Metcalff3286a32013-08-01 11:36:42 -04001101 int instance = mpipe_instance(dev);
1102 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001103 int network_cpus_count = cpus_weight(network_cpus_map);
1104
1105 if (!hash_default) {
1106 netdev_err(dev, "Networking requires hash_default!\n");
1107 return -EIO;
1108 }
1109
Chris Metcalff3286a32013-08-01 11:36:42 -04001110 rc = gxio_mpipe_init(&md->context, instance);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001111 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001112 netdev_err(dev, "gxio_mpipe_init: mpipe[%d] %d\n",
1113 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001114 return -EIO;
1115 }
1116
1117 /* Set up the buffer stacks. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001118 rc = init_buffer_stacks(dev, network_cpus_count);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001119 if (rc != 0)
1120 goto fail;
1121
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001122 /* Allocate one NotifRing for each network cpu. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001123 rc = gxio_mpipe_alloc_notif_rings(&md->context,
1124 network_cpus_count, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001125 if (rc < 0) {
1126 netdev_err(dev, "gxio_mpipe_alloc_notif_rings failed %d\n",
1127 rc);
1128 goto fail;
1129 }
1130
1131 /* Init NotifRings per-cpu. */
1132 first_ring = rc;
1133 ring = first_ring;
1134 for_each_online_cpu(cpu) {
1135 rc = alloc_percpu_mpipe_resources(dev, cpu, ring);
1136 if (rc < 0)
1137 goto fail;
1138 ring = rc;
1139 }
1140
1141 /* Initialize NotifGroup and buckets. */
1142 rc = init_notif_group_and_buckets(dev, first_ring, network_cpus_count);
1143 if (rc != 0)
1144 goto fail;
1145
1146 /* Create and enable interrupts. */
1147 rc = tile_net_setup_interrupts(dev);
1148 if (rc != 0)
1149 goto fail;
1150
1151 return 0;
1152
1153fail:
Chris Metcalff3286a32013-08-01 11:36:42 -04001154 tile_net_init_mpipe_fail(instance);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001155 return rc;
1156}
1157
1158/* Create persistent egress info for a given egress channel.
1159 * Note that this may be shared between, say, "gbe0" and "xgbe0".
1160 * ISSUE: Defer header allocation until TSO is actually needed?
1161 */
1162static int tile_net_init_egress(struct net_device *dev, int echannel)
1163{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001164 static int ering = -1;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001165 struct page *headers_page, *edescs_page, *equeue_page;
1166 gxio_mpipe_edesc_t *edescs;
1167 gxio_mpipe_equeue_t *equeue;
1168 unsigned char *headers;
1169 int headers_order, edescs_order, equeue_order;
1170 size_t edescs_size;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001171 int rc = -ENOMEM;
Chris Metcalff3286a32013-08-01 11:36:42 -04001172 int instance = mpipe_instance(dev);
1173 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001174
1175 /* Only initialize once. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001176 if (md->egress_for_echannel[echannel].equeue != NULL)
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001177 return 0;
1178
1179 /* Allocate memory for the "headers". */
1180 headers_order = get_order(EQUEUE_ENTRIES * HEADER_BYTES);
1181 headers_page = alloc_pages(GFP_KERNEL, headers_order);
1182 if (headers_page == NULL) {
1183 netdev_warn(dev,
1184 "Could not alloc %zd bytes for TSO headers.\n",
1185 PAGE_SIZE << headers_order);
1186 goto fail;
1187 }
1188 headers = pfn_to_kaddr(page_to_pfn(headers_page));
1189
1190 /* Allocate memory for the "edescs". */
1191 edescs_size = EQUEUE_ENTRIES * sizeof(*edescs);
1192 edescs_order = get_order(edescs_size);
1193 edescs_page = alloc_pages(GFP_KERNEL, edescs_order);
1194 if (edescs_page == NULL) {
1195 netdev_warn(dev,
1196 "Could not alloc %zd bytes for eDMA ring.\n",
1197 edescs_size);
1198 goto fail_headers;
1199 }
1200 edescs = pfn_to_kaddr(page_to_pfn(edescs_page));
1201
1202 /* Allocate memory for the "equeue". */
1203 equeue_order = get_order(sizeof(*equeue));
1204 equeue_page = alloc_pages(GFP_KERNEL, equeue_order);
1205 if (equeue_page == NULL) {
1206 netdev_warn(dev,
1207 "Could not alloc %zd bytes for equeue info.\n",
1208 PAGE_SIZE << equeue_order);
1209 goto fail_edescs;
1210 }
1211 equeue = pfn_to_kaddr(page_to_pfn(equeue_page));
1212
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001213 /* Allocate an edma ring (using a one entry "free list"). */
1214 if (ering < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001215 rc = gxio_mpipe_alloc_edma_rings(&md->context, 1, 0, 0);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001216 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001217 netdev_warn(dev, "gxio_mpipe_alloc_edma_rings: "
1218 "mpipe[%d] %d\n", instance, rc);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001219 goto fail_equeue;
1220 }
1221 ering = rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001222 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001223
1224 /* Initialize the equeue. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001225 rc = gxio_mpipe_equeue_init(equeue, &md->context, ering, echannel,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001226 edescs, edescs_size, 0);
1227 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001228 netdev_err(dev, "gxio_mpipe_equeue_init: mpipe[%d] %d\n",
1229 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001230 goto fail_equeue;
1231 }
1232
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001233 /* Don't reuse the ering later. */
1234 ering = -1;
1235
1236 if (jumbo_num != 0) {
1237 /* Make sure "jumbo" packets can be egressed safely. */
1238 if (gxio_mpipe_equeue_set_snf_size(equeue, 10368) < 0) {
1239 /* ISSUE: There is no "gxio_mpipe_equeue_destroy()". */
1240 netdev_warn(dev, "Jumbo packets may not be egressed"
1241 " properly on channel %d\n", echannel);
1242 }
1243 }
1244
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001245 /* Done. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001246 md->egress_for_echannel[echannel].equeue = equeue;
1247 md->egress_for_echannel[echannel].headers = headers;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001248 return 0;
1249
1250fail_equeue:
1251 __free_pages(equeue_page, equeue_order);
1252
1253fail_edescs:
1254 __free_pages(edescs_page, edescs_order);
1255
1256fail_headers:
1257 __free_pages(headers_page, headers_order);
1258
1259fail:
1260 return rc;
1261}
1262
1263/* Return channel number for a newly-opened link. */
1264static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link,
1265 const char *link_name)
1266{
Chris Metcalff3286a32013-08-01 11:36:42 -04001267 int instance = mpipe_instance(dev);
1268 struct mpipe_data *md = &mpipe_data[instance];
1269 int rc = gxio_mpipe_link_open(link, &md->context, link_name, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001270 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001271 netdev_err(dev, "Failed to open '%s', mpipe[%d], %d\n",
1272 link_name, instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001273 return rc;
1274 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001275 if (jumbo_num != 0) {
1276 u32 attr = GXIO_MPIPE_LINK_RECEIVE_JUMBO;
1277 rc = gxio_mpipe_link_set_attr(link, attr, 1);
1278 if (rc != 0) {
1279 netdev_err(dev,
1280 "Cannot receive jumbo packets on '%s'\n",
1281 link_name);
1282 gxio_mpipe_link_close(link);
1283 return rc;
1284 }
1285 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001286 rc = gxio_mpipe_link_channel(link);
1287 if (rc < 0 || rc >= TILE_NET_CHANNELS) {
1288 netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc);
1289 gxio_mpipe_link_close(link);
1290 return -EINVAL;
1291 }
1292 return rc;
1293}
1294
1295/* Help the kernel activate the given network interface. */
1296static int tile_net_open(struct net_device *dev)
1297{
1298 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -04001299 int cpu, rc, instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001300
1301 mutex_lock(&tile_net_devs_for_channel_mutex);
1302
Chris Metcalff3286a32013-08-01 11:36:42 -04001303 /* Get the instance info. */
1304 rc = gxio_mpipe_link_instance(dev->name);
1305 if (rc < 0 || rc >= NR_MPIPE_MAX)
1306 return -EIO;
1307
1308 priv->instance = rc;
1309 instance = rc;
1310 if (!mpipe_data[rc].context.mmio_fast_base) {
1311 /* Do one-time initialization per instance the first time
1312 * any device is opened.
1313 */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001314 rc = tile_net_init_mpipe(dev);
1315 if (rc != 0)
1316 goto fail;
1317 }
1318
1319 /* Determine if this is the "loopify" device. */
1320 if (unlikely((loopify_link_name != NULL) &&
1321 !strcmp(dev->name, loopify_link_name))) {
1322 rc = tile_net_link_open(dev, &priv->link, "loop0");
1323 if (rc < 0)
1324 goto fail;
1325 priv->channel = rc;
1326 rc = tile_net_link_open(dev, &priv->loopify_link, "loop1");
1327 if (rc < 0)
1328 goto fail;
1329 priv->loopify_channel = rc;
1330 priv->echannel = rc;
1331 } else {
1332 rc = tile_net_link_open(dev, &priv->link, dev->name);
1333 if (rc < 0)
1334 goto fail;
1335 priv->channel = rc;
1336 priv->echannel = rc;
1337 }
1338
1339 /* Initialize egress info (if needed). Once ever, per echannel. */
1340 rc = tile_net_init_egress(dev, priv->echannel);
1341 if (rc != 0)
1342 goto fail;
1343
Chris Metcalff3286a32013-08-01 11:36:42 -04001344 mpipe_data[instance].tile_net_devs_for_channel[priv->channel] = dev;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001345
1346 rc = tile_net_update(dev);
1347 if (rc != 0)
1348 goto fail;
1349
1350 mutex_unlock(&tile_net_devs_for_channel_mutex);
1351
1352 /* Initialize the transmit wake timer for this device for each cpu. */
1353 for_each_online_cpu(cpu) {
1354 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1355 struct tile_net_tx_wake *tx_wake =
Chris Metcalff3286a32013-08-01 11:36:42 -04001356 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001357
1358 hrtimer_init(&tx_wake->timer, CLOCK_MONOTONIC,
1359 HRTIMER_MODE_REL);
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001360 tx_wake->tx_queue_idx = cpu;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001361 tx_wake->timer.function = tile_net_handle_tx_wake_timer;
1362 tx_wake->dev = dev;
1363 }
1364
1365 for_each_online_cpu(cpu)
1366 netif_start_subqueue(dev, cpu);
1367 netif_carrier_on(dev);
1368 return 0;
1369
1370fail:
1371 if (priv->loopify_channel >= 0) {
1372 if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1373 netdev_warn(dev, "Failed to close loopify link!\n");
1374 priv->loopify_channel = -1;
1375 }
1376 if (priv->channel >= 0) {
1377 if (gxio_mpipe_link_close(&priv->link) != 0)
1378 netdev_warn(dev, "Failed to close link!\n");
1379 priv->channel = -1;
1380 }
1381 priv->echannel = -1;
Chris Metcalff3286a32013-08-01 11:36:42 -04001382 mpipe_data[instance].tile_net_devs_for_channel[priv->channel] = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001383 mutex_unlock(&tile_net_devs_for_channel_mutex);
1384
1385 /* Don't return raw gxio error codes to generic Linux. */
1386 return (rc > -512) ? rc : -EIO;
1387}
1388
1389/* Help the kernel deactivate the given network interface. */
1390static int tile_net_stop(struct net_device *dev)
1391{
1392 struct tile_net_priv *priv = netdev_priv(dev);
1393 int cpu;
Chris Metcalff3286a32013-08-01 11:36:42 -04001394 int instance = priv->instance;
1395 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001396
1397 for_each_online_cpu(cpu) {
1398 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1399 struct tile_net_tx_wake *tx_wake =
Chris Metcalff3286a32013-08-01 11:36:42 -04001400 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001401
1402 hrtimer_cancel(&tx_wake->timer);
1403 netif_stop_subqueue(dev, cpu);
1404 }
1405
1406 mutex_lock(&tile_net_devs_for_channel_mutex);
Chris Metcalff3286a32013-08-01 11:36:42 -04001407 md->tile_net_devs_for_channel[priv->channel] = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001408 (void)tile_net_update(dev);
1409 if (priv->loopify_channel >= 0) {
1410 if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1411 netdev_warn(dev, "Failed to close loopify link!\n");
1412 priv->loopify_channel = -1;
1413 }
1414 if (priv->channel >= 0) {
1415 if (gxio_mpipe_link_close(&priv->link) != 0)
1416 netdev_warn(dev, "Failed to close link!\n");
1417 priv->channel = -1;
1418 }
1419 priv->echannel = -1;
1420 mutex_unlock(&tile_net_devs_for_channel_mutex);
1421
1422 return 0;
1423}
1424
1425/* Determine the VA for a fragment. */
1426static inline void *tile_net_frag_buf(skb_frag_t *f)
1427{
1428 unsigned long pfn = page_to_pfn(skb_frag_page(f));
1429 return pfn_to_kaddr(pfn) + f->page_offset;
1430}
1431
1432/* Acquire a completion entry and an egress slot, or if we can't,
1433 * stop the queue and schedule the tx_wake timer.
1434 */
1435static s64 tile_net_equeue_try_reserve(struct net_device *dev,
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001436 int tx_queue_idx,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001437 struct tile_net_comps *comps,
1438 gxio_mpipe_equeue_t *equeue,
1439 int num_edescs)
1440{
1441 /* Try to acquire a completion entry. */
1442 if (comps->comp_next - comps->comp_last < TILE_NET_MAX_COMPS - 1 ||
1443 tile_net_free_comps(equeue, comps, 32, false) != 0) {
1444
1445 /* Try to acquire an egress slot. */
1446 s64 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1447 if (slot >= 0)
1448 return slot;
1449
1450 /* Freeing some completions gives the equeue time to drain. */
1451 tile_net_free_comps(equeue, comps, TILE_NET_MAX_COMPS, false);
1452
1453 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1454 if (slot >= 0)
1455 return slot;
1456 }
1457
1458 /* Still nothing; give up and stop the queue for a short while. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001459 netif_stop_subqueue(dev, tx_queue_idx);
1460 tile_net_schedule_tx_wake_timer(dev, tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001461 return -1;
1462}
1463
1464/* Determine how many edesc's are needed for TSO.
1465 *
1466 * Sometimes, if "sendfile()" requires copying, we will be called with
1467 * "data" containing the header and payload, with "frags" being empty.
1468 * Sometimes, for example when using NFS over TCP, a single segment can
1469 * span 3 fragments. This requires special care.
1470 */
1471static int tso_count_edescs(struct sk_buff *skb)
1472{
1473 struct skb_shared_info *sh = skb_shinfo(skb);
Chris Metcalf83885462012-07-11 14:08:21 -04001474 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001475 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001476 unsigned int p_len = sh->gso_size;
1477 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001478 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1479 long f_used = 0; /* bytes used from the current fragment */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001480 long n; /* size of the current piece of payload */
1481 int num_edescs = 0;
1482 int segment;
1483
1484 for (segment = 0; segment < sh->gso_segs; segment++) {
1485
1486 unsigned int p_used = 0;
1487
1488 /* One edesc for header and for each piece of the payload. */
1489 for (num_edescs++; p_used < p_len; num_edescs++) {
1490
1491 /* Advance as needed. */
1492 while (f_used >= f_size) {
1493 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001494 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001495 f_used = 0;
1496 }
1497
1498 /* Use bytes from the current fragment. */
1499 n = p_len - p_used;
1500 if (n > f_size - f_used)
1501 n = f_size - f_used;
1502 f_used += n;
1503 p_used += n;
1504 }
1505
1506 /* The last segment may be less than gso_size. */
1507 data_len -= p_len;
1508 if (data_len < p_len)
1509 p_len = data_len;
1510 }
1511
1512 return num_edescs;
1513}
1514
1515/* Prepare modified copies of the skbuff headers.
1516 * FIXME: add support for IPv6.
1517 */
1518static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
1519 s64 slot)
1520{
1521 struct skb_shared_info *sh = skb_shinfo(skb);
1522 struct iphdr *ih;
1523 struct tcphdr *th;
Chris Metcalf83885462012-07-11 14:08:21 -04001524 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001525 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001526 unsigned char *data = skb->data;
Chris Metcalf83885462012-07-11 14:08:21 -04001527 unsigned int ih_off, th_off, p_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001528 unsigned int isum_seed, tsum_seed, id, seq;
1529 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001530 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1531 long f_used = 0; /* bytes used from the current fragment */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001532 long n; /* size of the current piece of payload */
1533 int segment;
1534
1535 /* Locate original headers and compute various lengths. */
1536 ih = ip_hdr(skb);
1537 th = tcp_hdr(skb);
1538 ih_off = skb_network_offset(skb);
1539 th_off = skb_transport_offset(skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001540 p_len = sh->gso_size;
1541
1542 /* Set up seed values for IP and TCP csum and initialize id and seq. */
1543 isum_seed = ((0xFFFF - ih->check) +
1544 (0xFFFF - ih->tot_len) +
1545 (0xFFFF - ih->id));
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001546 tsum_seed = th->check + (0xFFFF ^ htons(skb->len));
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001547 id = ntohs(ih->id);
1548 seq = ntohl(th->seq);
1549
1550 /* Prepare all the headers. */
1551 for (segment = 0; segment < sh->gso_segs; segment++) {
1552 unsigned char *buf;
1553 unsigned int p_used = 0;
1554
1555 /* Copy to the header memory for this segment. */
1556 buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1557 NET_IP_ALIGN;
1558 memcpy(buf, data, sh_len);
1559
1560 /* Update copied ip header. */
1561 ih = (struct iphdr *)(buf + ih_off);
1562 ih->tot_len = htons(sh_len + p_len - ih_off);
1563 ih->id = htons(id);
1564 ih->check = csum_long(isum_seed + ih->tot_len +
1565 ih->id) ^ 0xffff;
1566
1567 /* Update copied tcp header. */
1568 th = (struct tcphdr *)(buf + th_off);
1569 th->seq = htonl(seq);
1570 th->check = csum_long(tsum_seed + htons(sh_len + p_len));
1571 if (segment != sh->gso_segs - 1) {
1572 th->fin = 0;
1573 th->psh = 0;
1574 }
1575
1576 /* Skip past the header. */
1577 slot++;
1578
1579 /* Skip past the payload. */
1580 while (p_used < p_len) {
1581
1582 /* Advance as needed. */
1583 while (f_used >= f_size) {
1584 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001585 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001586 f_used = 0;
1587 }
1588
1589 /* Use bytes from the current fragment. */
1590 n = p_len - p_used;
1591 if (n > f_size - f_used)
1592 n = f_size - f_used;
1593 f_used += n;
1594 p_used += n;
1595
1596 slot++;
1597 }
1598
1599 id++;
1600 seq += p_len;
1601
1602 /* The last segment may be less than gso_size. */
1603 data_len -= p_len;
1604 if (data_len < p_len)
1605 p_len = data_len;
1606 }
1607
1608 /* Flush the headers so they are ready for hardware DMA. */
1609 wmb();
1610}
1611
1612/* Pass all the data to mpipe for egress. */
1613static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue,
1614 struct sk_buff *skb, unsigned char *headers, s64 slot)
1615{
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001616 struct skb_shared_info *sh = skb_shinfo(skb);
Chris Metcalff3286a32013-08-01 11:36:42 -04001617 int instance = mpipe_instance(dev);
1618 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf83885462012-07-11 14:08:21 -04001619 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001620 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001621 unsigned int p_len = sh->gso_size;
1622 gxio_mpipe_edesc_t edesc_head = { { 0 } };
1623 gxio_mpipe_edesc_t edesc_body = { { 0 } };
1624 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001625 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1626 long f_used = 0; /* bytes used from the current fragment */
1627 void *f_data = skb->data + sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001628 long n; /* size of the current piece of payload */
1629 unsigned long tx_packets = 0, tx_bytes = 0;
Chris Metcalf83885462012-07-11 14:08:21 -04001630 unsigned int csum_start;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001631 int segment;
1632
1633 /* Prepare to egress the headers: set up header edesc. */
1634 csum_start = skb_checksum_start_offset(skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001635 edesc_head.csum = 1;
1636 edesc_head.csum_start = csum_start;
1637 edesc_head.csum_dest = csum_start + skb->csum_offset;
1638 edesc_head.xfer_size = sh_len;
1639
1640 /* This is only used to specify the TLB. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001641 edesc_head.stack_idx = md->first_buffer_stack;
1642 edesc_body.stack_idx = md->first_buffer_stack;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001643
1644 /* Egress all the edescs. */
1645 for (segment = 0; segment < sh->gso_segs; segment++) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001646 unsigned char *buf;
1647 unsigned int p_used = 0;
1648
1649 /* Egress the header. */
1650 buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1651 NET_IP_ALIGN;
1652 edesc_head.va = va_to_tile_io_addr(buf);
1653 gxio_mpipe_equeue_put_at(equeue, edesc_head, slot);
1654 slot++;
1655
1656 /* Egress the payload. */
1657 while (p_used < p_len) {
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001658 void *va;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001659
1660 /* Advance as needed. */
1661 while (f_used >= f_size) {
1662 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001663 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalf83885462012-07-11 14:08:21 -04001664 f_data = tile_net_frag_buf(&sh->frags[f_id]);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001665 f_used = 0;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001666 }
1667
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001668 va = f_data + f_used;
1669
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001670 /* Use bytes from the current fragment. */
1671 n = p_len - p_used;
1672 if (n > f_size - f_used)
1673 n = f_size - f_used;
1674 f_used += n;
1675 p_used += n;
1676
1677 /* Egress a piece of the payload. */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001678 edesc_body.va = va_to_tile_io_addr(va);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001679 edesc_body.xfer_size = n;
1680 edesc_body.bound = !(p_used < p_len);
1681 gxio_mpipe_equeue_put_at(equeue, edesc_body, slot);
1682 slot++;
1683 }
1684
1685 tx_packets++;
1686 tx_bytes += sh_len + p_len;
1687
1688 /* The last segment may be less than gso_size. */
1689 data_len -= p_len;
1690 if (data_len < p_len)
1691 p_len = data_len;
1692 }
1693
1694 /* Update stats. */
Chris Metcalfad018182013-08-01 11:36:42 -04001695 tile_net_stats_add(tx_packets, &dev->stats.tx_packets);
1696 tile_net_stats_add(tx_bytes, &dev->stats.tx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001697}
1698
1699/* Do "TSO" handling for egress.
1700 *
1701 * Normally drivers set NETIF_F_TSO only to support hardware TSO;
1702 * otherwise the stack uses scatter-gather to implement GSO in software.
1703 * On our testing, enabling GSO support (via NETIF_F_SG) drops network
1704 * performance down to around 7.5 Gbps on the 10G interfaces, although
1705 * also dropping cpu utilization way down, to under 8%. But
1706 * implementing "TSO" in the driver brings performance back up to line
1707 * rate, while dropping cpu usage even further, to less than 4%. In
1708 * practice, profiling of GSO shows that skb_segment() is what causes
1709 * the performance overheads; we benefit in the driver from using
1710 * preallocated memory to duplicate the TCP/IP headers.
1711 */
1712static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1713{
1714 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1715 struct tile_net_priv *priv = netdev_priv(dev);
1716 int channel = priv->echannel;
Chris Metcalff3286a32013-08-01 11:36:42 -04001717 int instance = priv->instance;
1718 struct mpipe_data *md = &mpipe_data[instance];
1719 struct tile_net_egress *egress = &md->egress_for_echannel[channel];
1720 struct tile_net_comps *comps =
1721 info->mpipe[instance].comps_for_echannel[channel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001722 gxio_mpipe_equeue_t *equeue = egress->equeue;
1723 unsigned long irqflags;
1724 int num_edescs;
1725 s64 slot;
1726
1727 /* Determine how many mpipe edesc's are needed. */
1728 num_edescs = tso_count_edescs(skb);
1729
1730 local_irq_save(irqflags);
1731
1732 /* Try to acquire a completion entry and an egress slot. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001733 slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1734 equeue, num_edescs);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001735 if (slot < 0) {
1736 local_irq_restore(irqflags);
1737 return NETDEV_TX_BUSY;
1738 }
1739
1740 /* Set up copies of header data properly. */
1741 tso_headers_prepare(skb, egress->headers, slot);
1742
1743 /* Actually pass the data to the network hardware. */
1744 tso_egress(dev, equeue, skb, egress->headers, slot);
1745
1746 /* Add a completion record. */
1747 add_comp(equeue, comps, slot + num_edescs - 1, skb);
1748
1749 local_irq_restore(irqflags);
1750
1751 /* Make sure the egress timer is scheduled. */
1752 tile_net_schedule_egress_timer();
1753
1754 return NETDEV_TX_OK;
1755}
1756
1757/* Analyze the body and frags for a transmit request. */
1758static unsigned int tile_net_tx_frags(struct frag *frags,
1759 struct sk_buff *skb,
1760 void *b_data, unsigned int b_len)
1761{
1762 unsigned int i, n = 0;
1763
1764 struct skb_shared_info *sh = skb_shinfo(skb);
1765
1766 if (b_len != 0) {
1767 frags[n].buf = b_data;
1768 frags[n++].length = b_len;
1769 }
1770
1771 for (i = 0; i < sh->nr_frags; i++) {
1772 skb_frag_t *f = &sh->frags[i];
1773 frags[n].buf = tile_net_frag_buf(f);
1774 frags[n++].length = skb_frag_size(f);
1775 }
1776
1777 return n;
1778}
1779
1780/* Help the kernel transmit a packet. */
1781static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1782{
1783 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1784 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -04001785 int instance = priv->instance;
1786 struct mpipe_data *md = &mpipe_data[instance];
1787 struct tile_net_egress *egress =
1788 &md->egress_for_echannel[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001789 gxio_mpipe_equeue_t *equeue = egress->equeue;
1790 struct tile_net_comps *comps =
Chris Metcalff3286a32013-08-01 11:36:42 -04001791 info->mpipe[instance].comps_for_echannel[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001792 unsigned int len = skb->len;
1793 unsigned char *data = skb->data;
1794 unsigned int num_edescs;
1795 struct frag frags[MAX_FRAGS];
1796 gxio_mpipe_edesc_t edescs[MAX_FRAGS];
1797 unsigned long irqflags;
1798 gxio_mpipe_edesc_t edesc = { { 0 } };
1799 unsigned int i;
1800 s64 slot;
1801
1802 if (skb_is_gso(skb))
1803 return tile_net_tx_tso(skb, dev);
1804
1805 num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
1806
1807 /* This is only used to specify the TLB. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001808 edesc.stack_idx = md->first_buffer_stack;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001809
1810 /* Prepare the edescs. */
1811 for (i = 0; i < num_edescs; i++) {
1812 edesc.xfer_size = frags[i].length;
1813 edesc.va = va_to_tile_io_addr(frags[i].buf);
1814 edescs[i] = edesc;
1815 }
1816
1817 /* Mark the final edesc. */
1818 edescs[num_edescs - 1].bound = 1;
1819
1820 /* Add checksum info to the initial edesc, if needed. */
1821 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1822 unsigned int csum_start = skb_checksum_start_offset(skb);
1823 edescs[0].csum = 1;
1824 edescs[0].csum_start = csum_start;
1825 edescs[0].csum_dest = csum_start + skb->csum_offset;
1826 }
1827
1828 local_irq_save(irqflags);
1829
1830 /* Try to acquire a completion entry and an egress slot. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001831 slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1832 equeue, num_edescs);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001833 if (slot < 0) {
1834 local_irq_restore(irqflags);
1835 return NETDEV_TX_BUSY;
1836 }
1837
1838 for (i = 0; i < num_edescs; i++)
1839 gxio_mpipe_equeue_put_at(equeue, edescs[i], slot++);
1840
1841 /* Add a completion record. */
1842 add_comp(equeue, comps, slot - 1, skb);
1843
1844 /* NOTE: Use ETH_ZLEN for short packets (e.g. 42 < 60). */
Chris Metcalfad018182013-08-01 11:36:42 -04001845 tile_net_stats_add(1, &dev->stats.tx_packets);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001846 tile_net_stats_add(max_t(unsigned int, len, ETH_ZLEN),
Chris Metcalfad018182013-08-01 11:36:42 -04001847 &dev->stats.tx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001848
1849 local_irq_restore(irqflags);
1850
1851 /* Make sure the egress timer is scheduled. */
1852 tile_net_schedule_egress_timer();
1853
1854 return NETDEV_TX_OK;
1855}
1856
1857/* Return subqueue id on this core (one per core). */
1858static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb)
1859{
1860 return smp_processor_id();
1861}
1862
1863/* Deal with a transmit timeout. */
1864static void tile_net_tx_timeout(struct net_device *dev)
1865{
1866 int cpu;
1867
1868 for_each_online_cpu(cpu)
1869 netif_wake_subqueue(dev, cpu);
1870}
1871
1872/* Ioctl commands. */
1873static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
1874{
1875 return -EOPNOTSUPP;
1876}
1877
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001878/* Change the MTU. */
1879static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
1880{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001881 if (new_mtu < 68)
1882 return -EINVAL;
1883 if (new_mtu > ((jumbo_num != 0) ? 9000 : 1500))
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001884 return -EINVAL;
1885 dev->mtu = new_mtu;
1886 return 0;
1887}
1888
1889/* Change the Ethernet address of the NIC.
1890 *
1891 * The hypervisor driver does not support changing MAC address. However,
1892 * the hardware does not do anything with the MAC address, so the address
1893 * which gets used on outgoing packets, and which is accepted on incoming
1894 * packets, is completely up to us.
1895 *
1896 * Returns 0 on success, negative on failure.
1897 */
1898static int tile_net_set_mac_address(struct net_device *dev, void *p)
1899{
1900 struct sockaddr *addr = p;
1901
1902 if (!is_valid_ether_addr(addr->sa_data))
1903 return -EINVAL;
1904 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1905 return 0;
1906}
1907
1908#ifdef CONFIG_NET_POLL_CONTROLLER
1909/* Polling 'interrupt' - used by things like netconsole to send skbs
1910 * without having to re-enable interrupts. It's not called while
1911 * the interrupt routine is executing.
1912 */
1913static void tile_net_netpoll(struct net_device *dev)
1914{
Chris Metcalff3286a32013-08-01 11:36:42 -04001915 int instance = mpipe_instance(dev);
1916 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1917 struct mpipe_data *md = &mpipe_data[instance];
1918
1919 disable_percpu_irq(md->ingress_irq);
1920 napi_schedule(&info->mpipe[instance].napi);
1921 enable_percpu_irq(md->ingress_irq, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001922}
1923#endif
1924
1925static const struct net_device_ops tile_net_ops = {
1926 .ndo_open = tile_net_open,
1927 .ndo_stop = tile_net_stop,
1928 .ndo_start_xmit = tile_net_tx,
1929 .ndo_select_queue = tile_net_select_queue,
1930 .ndo_do_ioctl = tile_net_ioctl,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001931 .ndo_change_mtu = tile_net_change_mtu,
1932 .ndo_tx_timeout = tile_net_tx_timeout,
1933 .ndo_set_mac_address = tile_net_set_mac_address,
1934#ifdef CONFIG_NET_POLL_CONTROLLER
1935 .ndo_poll_controller = tile_net_netpoll,
1936#endif
1937};
1938
1939/* The setup function.
1940 *
1941 * This uses ether_setup() to assign various fields in dev, including
1942 * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
1943 */
1944static void tile_net_setup(struct net_device *dev)
1945{
Chris Metcalfa8eaed52013-08-01 11:36:42 -04001946 netdev_features_t features = 0;
1947
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001948 ether_setup(dev);
1949 dev->netdev_ops = &tile_net_ops;
1950 dev->watchdog_timeo = TILE_NET_TIMEOUT;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001951 dev->mtu = 1500;
Chris Metcalfa8eaed52013-08-01 11:36:42 -04001952
1953 features |= NETIF_F_LLTX;
1954 features |= NETIF_F_HW_CSUM;
1955 features |= NETIF_F_SG;
1956 features |= NETIF_F_TSO;
1957
1958 dev->hw_features |= features;
1959 dev->vlan_features |= features;
1960 dev->features |= features;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001961}
1962
1963/* Allocate the device structure, register the device, and obtain the
1964 * MAC address from the hypervisor.
1965 */
1966static void tile_net_dev_init(const char *name, const uint8_t *mac)
1967{
1968 int ret;
1969 int i;
1970 int nz_addr = 0;
1971 struct net_device *dev;
1972 struct tile_net_priv *priv;
1973
1974 /* HACK: Ignore "loop" links. */
1975 if (strncmp(name, "loop", 4) == 0)
1976 return;
1977
1978 /* Allocate the device structure. Normally, "name" is a
1979 * template, instantiated by register_netdev(), but not for us.
1980 */
1981 dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup,
1982 NR_CPUS, 1);
1983 if (!dev) {
1984 pr_err("alloc_netdev_mqs(%s) failed\n", name);
1985 return;
1986 }
1987
1988 /* Initialize "priv". */
1989 priv = netdev_priv(dev);
1990 memset(priv, 0, sizeof(*priv));
1991 priv->dev = dev;
1992 priv->channel = -1;
1993 priv->loopify_channel = -1;
1994 priv->echannel = -1;
1995
1996 /* Get the MAC address and set it in the device struct; this must
1997 * be done before the device is opened. If the MAC is all zeroes,
1998 * we use a random address, since we're probably on the simulator.
1999 */
2000 for (i = 0; i < 6; i++)
2001 nz_addr |= mac[i];
2002
2003 if (nz_addr) {
2004 memcpy(dev->dev_addr, mac, 6);
2005 dev->addr_len = 6;
2006 } else {
Chris Metcalfc8ab13f2012-07-18 12:23:06 -04002007 eth_hw_addr_random(dev);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002008 }
2009
2010 /* Register the network device. */
2011 ret = register_netdev(dev);
2012 if (ret) {
2013 netdev_err(dev, "register_netdev failed %d\n", ret);
2014 free_netdev(dev);
2015 return;
2016 }
2017}
2018
2019/* Per-cpu module initialization. */
2020static void tile_net_init_module_percpu(void *unused)
2021{
2022 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
2023 int my_cpu = smp_processor_id();
Chris Metcalff3286a32013-08-01 11:36:42 -04002024 int instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002025
Chris Metcalff3286a32013-08-01 11:36:42 -04002026 for (instance = 0; instance < NR_MPIPE_MAX; instance++) {
2027 info->mpipe[instance].has_iqueue = false;
2028 info->mpipe[instance].instance = instance;
2029 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002030 info->my_cpu = my_cpu;
2031
2032 /* Initialize the egress timer. */
2033 hrtimer_init(&info->egress_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2034 info->egress_timer.function = tile_net_handle_egress_timer;
2035}
2036
2037/* Module initialization. */
2038static int __init tile_net_init_module(void)
2039{
2040 int i;
2041 char name[GXIO_MPIPE_LINK_NAME_LEN];
2042 uint8_t mac[6];
2043
2044 pr_info("Tilera Network Driver\n");
2045
Chris Metcalff3286a32013-08-01 11:36:42 -04002046 BUILD_BUG_ON(NR_MPIPE_MAX != 2);
2047
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002048 mutex_init(&tile_net_devs_for_channel_mutex);
2049
2050 /* Initialize each CPU. */
2051 on_each_cpu(tile_net_init_module_percpu, NULL, 1);
2052
2053 /* Find out what devices we have, and initialize them. */
2054 for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++)
2055 tile_net_dev_init(name, mac);
2056
2057 if (!network_cpus_init())
2058 network_cpus_map = *cpu_online_mask;
2059
2060 return 0;
2061}
2062
2063module_init(tile_net_init_module);