Merge tag 'rxrpc-rewrite-20160615' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
David Howells says:
====================
rxrpc: Rework endpoint record handling
Here's the next part of the AF_RXRPC rewrite. In this set I rework
endpoint record handling. There are two types of endpoint record, local
and peer. The local endpoint record is used as an anchor for the transport
socket that AF_RXRPC uses (at the moment a UDP socket). Local endpoints
can be shared between AF_RXRPC sockets under certain restricted
circumstances.
The peer endpoint is a record of the remote end. It is (or will be) used
to keep track MTU and RTT values and, with these changes, is used to find
the call(s) to abort when a network error occurs.
The following significant changes are made:
(1) The local endpoint event handling code is split out into its own file.
(2) The local endpoint list bottom half-excluding spinlock is removed as
things are arranged such that sk_user_data will not change whilst the
transport socket callbacks are in progress.
(3) Local endpoints can now only be shared if they have the same transport
address (as before) and have a local service ID of 0 (ie. they're not
listening for incoming calls). This prevents callbacks from a server
to one process being picked up by another process.
(4) Local endpoint destruction is now accomplished by the same work item
as processes events, meaning that the destructor doesn't need to wait
for the event processor.
(5) Peer endpoints are now held in a hash table rather than a flat list.
(6) Peer endpoints are now destroyed by RCU rather than by work item.
(7) Peer endpoints are now differentiated by local endpoint and remote
transport port in addition to remote transport address and transport
type and family.
This means that a firewall that excludes access between a particular
local port and remote port won't cause calls to be aborted that use a
different port pair.
(8) Error report handling now no longer assumes that the source is always
an IPv4 ICMP message from a UDP port and has assumptions that an ICMP
message comes from an IPv4 socket removed. At some point IPv6 support
will be added.
(9) Peer endpoints rather than local endpoints are now the anchor point
for distributing network error reports.
(10) Both types of endpoint records are now disposed of as soon as all
references to them are gone. There is less hanging around and once
their usage counts hit zero, records can no longer be resurrected.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
new file mode 100644
index 0000000..c070076
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
@@ -0,0 +1,13 @@
+* Cirrus Logic CS8900/CS8920 Network Controller
+
+Required properties:
+- compatible : Should be "cirrus,cs8900" or "cirrus,cs8920".
+- reg : Address and length of the IO space.
+- interrupts : Should contain the controller interrupt line.
+
+Examples:
+ eth0: eth@10000000 {
+ compatible = "cirrus,cs8900";
+ reg = <0x10000000 0x400>;
+ interrupts = <10>;
+ };
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 245c063..4523c86 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -127,7 +127,7 @@
dev_err(&oct->pci_dev->dev, "Unknown link interface reported\n");
}
- if (linfo->link.s.status) {
+ if (linfo->link.s.link_up) {
ethtool_cmd_speed_set(ecmd, linfo->link.s.speed);
ecmd->duplex = linfo->link.s.duplex;
} else {
@@ -222,23 +222,20 @@
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_GPIO_ACCESS;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = addr;
- nctrl.ncmd.s.param3 = val;
+ nctrl.ncmd.s.param1 = addr;
+ nctrl.ncmd.s.param2 = val;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
return -EINVAL;
@@ -303,9 +300,10 @@
mdio_cmd->mdio_addr = loc;
if (op)
mdio_cmd->value1 = *value;
- mdio_cmd->value2 = lio->linfo.ifidx;
octeon_swap_8B_data((u64 *)mdio_cmd, sizeof(struct oct_mdio_cmd) / 8);
+ sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, OPCODE_NIC_MDIO45,
0, 0, 0);
@@ -317,7 +315,7 @@
retval = octeon_send_soft_command(oct_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&oct_dev->pci_dev->dev,
"octnet_mdio45_access instruction failed status: %x\n",
retval);
@@ -503,10 +501,10 @@
if ((msglvl ^ lio->msg_enable) & NETIF_MSG_HW) {
if (msglvl & NETIF_MSG_HW)
liquidio_set_feature(netdev,
- OCTNET_CMD_VERBOSE_ENABLE);
+ OCTNET_CMD_VERBOSE_ENABLE, 0);
else
liquidio_set_feature(netdev,
- OCTNET_CMD_VERBOSE_DISABLE);
+ OCTNET_CMD_VERBOSE_DISABLE, 0);
}
lio->msg_enable = msglvl;
@@ -653,7 +651,7 @@
intrmod_cfg->intrmod_mincnt_trigger;
}
- iq = oct->instr_queue[lio->linfo.txpciq[0]];
+ iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
break;
@@ -722,7 +720,7 @@
sc->wait_time = 1000;
retval = octeon_send_soft_command(oct_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
octeon_free_soft_command(oct_dev, sc);
return -EINVAL;
}
@@ -859,7 +857,7 @@
if ((intr_coal->tx_max_coalesced_frames >= CN6XXX_DB_MIN) &&
(intr_coal->tx_max_coalesced_frames <= CN6XXX_DB_MAX)) {
for (j = 0; j < lio->linfo.num_txpciq; j++) {
- q_no = lio->linfo.txpciq[j];
+ q_no = lio->linfo.txpciq[j].s.q_no;
oct->instr_queue[q_no]->fill_threshold =
intr_coal->tx_max_coalesced_frames;
}
@@ -950,7 +948,6 @@
struct octeon_device *oct = lio->oct_dev;
struct oct_link_info *linfo;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
/* get the link info */
@@ -978,9 +975,9 @@
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_SET_SETTINGS;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 1000;
nctrl.netpndev = (u64)netdev;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
/* Passing the parameters sent by ethtool like Speed, Autoneg & Duplex
@@ -990,19 +987,17 @@
/* Autoneg ON */
nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON |
OCTNIC_NCMD_AUTONEG_ON;
- nctrl.ncmd.s.param2 = ecmd->advertising;
+ nctrl.ncmd.s.param1 = ecmd->advertising;
} else {
/* Autoneg OFF */
nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON;
- nctrl.ncmd.s.param3 = ecmd->duplex;
+ nctrl.ncmd.s.param2 = ecmd->duplex;
- nctrl.ncmd.s.param2 = ecmd->speed;
+ nctrl.ncmd.s.param1 = ecmd->speed;
}
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to set settings\n");
return -1;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 655d89e..d0ab97c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -84,6 +84,8 @@
module_param(conf_type, int, 0);
MODULE_PARM_DESC(conf_type, "select octeon configuration 0 default 1 ovs");
+static int ptp_enable = 1;
+
/* Bit mask values for lio->ifstate */
#define LIO_IFSTATE_DROQ_OPS 0x01
#define LIO_IFSTATE_REGISTERED 0x02
@@ -166,6 +168,8 @@
* received from the IP layer.
*/
struct octeon_sg_entry *sg;
+
+ u64 sg_dma_ptr;
};
/** This structure is used by NIC driver to store information required
@@ -682,7 +686,8 @@
int i;
for (i = 0; i < netdev->num_tx_queues; i++)
- netif_wake_subqueue(netdev, i);
+ if (__netif_subqueue_stopped(netdev, i))
+ netif_wake_subqueue(netdev, i);
} else {
netif_wake_queue(netdev);
}
@@ -705,7 +710,7 @@
{
struct lio *lio = GET_LIO(netdev);
- if (lio->linfo.link.s.status) {
+ if (lio->linfo.link.s.link_up) {
txqs_start(netdev);
return;
}
@@ -752,11 +757,14 @@
/* check each sub-queue state */
for (q = 0; q < numqs; q++) {
- iq = lio->linfo.txpciq[q & (lio->linfo.num_txpciq - 1)];
+ iq = lio->linfo.txpciq[q %
+ (lio->linfo.num_txpciq)].s.q_no;
if (octnet_iq_is_full(lio->oct_dev, iq))
continue;
- wake_q(lio->netdev, q);
- ret_val++;
+ if (__netif_subqueue_stopped(lio->netdev, q)) {
+ wake_q(lio->netdev, q);
+ ret_val++;
+ }
}
} else {
if (octnet_iq_is_full(lio->oct_dev, lio->txq))
@@ -787,64 +795,116 @@
}
/**
- * \brief Delete gather list
+ * \brief Delete gather lists
* @param lio per-network private data
*/
-static void delete_glist(struct lio *lio)
+static void delete_glists(struct lio *lio)
{
struct octnic_gather *g;
+ int i;
- do {
- g = (struct octnic_gather *)
- list_delete_head(&lio->glist);
- if (g) {
- if (g->sg)
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- kfree(g);
- }
- } while (g);
+ if (!lio->glist)
+ return;
+
+ for (i = 0; i < lio->linfo.num_txpciq; i++) {
+ do {
+ g = (struct octnic_gather *)
+ list_delete_head(&lio->glist[i]);
+ if (g) {
+ if (g->sg) {
+ dma_unmap_single(&lio->oct_dev->
+ pci_dev->dev,
+ g->sg_dma_ptr,
+ g->sg_size,
+ DMA_TO_DEVICE);
+ kfree((void *)((unsigned long)g->sg -
+ g->adjust));
+ }
+ kfree(g);
+ }
+ } while (g);
+ }
+
+ kfree((void *)lio->glist);
}
/**
- * \brief Setup gather list
+ * \brief Setup gather lists
* @param lio per-network private data
*/
-static int setup_glist(struct lio *lio)
+static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
{
- int i;
+ int i, j;
struct octnic_gather *g;
- INIT_LIST_HEAD(&lio->glist);
+ lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
+ GFP_KERNEL);
+ if (!lio->glist_lock)
+ return 1;
- for (i = 0; i < lio->tx_qsize; i++) {
- g = kzalloc(sizeof(*g), GFP_KERNEL);
- if (!g)
- break;
-
- g->sg_size =
- ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
- }
-
- /* The gather component should be aligned on 64-bit boundary */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
- }
- list_add_tail(&g->list, &lio->glist);
+ lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
+ GFP_KERNEL);
+ if (!lio->glist) {
+ kfree((void *)lio->glist_lock);
+ return 1;
}
- if (i == lio->tx_qsize)
- return 0;
+ for (i = 0; i < num_iqs; i++) {
+ int numa_node = cpu_to_node(i % num_online_cpus());
- delete_glist(lio);
- return 1;
+ spin_lock_init(&lio->glist_lock[i]);
+
+ INIT_LIST_HEAD(&lio->glist[i]);
+
+ for (j = 0; j < lio->tx_qsize; j++) {
+ g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+ numa_node);
+ if (!g)
+ g = kzalloc(sizeof(*g), GFP_KERNEL);
+ if (!g)
+ break;
+
+ g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+ OCT_SG_ENTRY_SIZE);
+
+ g->sg = kmalloc_node(g->sg_size + 8,
+ GFP_KERNEL, numa_node);
+ if (!g->sg)
+ g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+ if (!g->sg) {
+ kfree(g);
+ break;
+ }
+
+ /* The gather component should be aligned on 64-bit
+ * boundary
+ */
+ if (((unsigned long)g->sg) & 7) {
+ g->adjust = 8 - (((unsigned long)g->sg) & 7);
+ g->sg = (struct octeon_sg_entry *)
+ ((unsigned long)g->sg + g->adjust);
+ }
+ g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
+ g->sg, g->sg_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev,
+ g->sg_dma_ptr)) {
+ kfree((void *)((unsigned long)g->sg -
+ g->adjust));
+ kfree(g);
+ break;
+ }
+
+ list_add_tail(&g->list, &lio->glist[i]);
+ }
+
+ if (j != lio->tx_qsize) {
+ delete_glists(lio);
+ return 1;
+ }
+ }
+
+ return 0;
}
/**
@@ -858,7 +918,7 @@
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
struct oct_link_info *linfo = &lio->linfo;
- if (linfo->link.s.status) {
+ if (linfo->link.s.link_up) {
netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
linfo->link.s.speed,
(linfo->link.s.duplex) ? "Full" : "Half");
@@ -880,13 +940,15 @@
union oct_link_status *ls)
{
struct lio *lio = GET_LIO(netdev);
+ int changed = (lio->linfo.link.u64 != ls->u64);
- if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
- lio->linfo.link.u64 = ls->u64;
+ lio->linfo.link.u64 = ls->u64;
+ if ((lio->intf_open) && (changed)) {
print_link_info(netdev);
+ lio->link_changes++;
- if (lio->linfo.link.s.status) {
+ if (lio->linfo.link.s.link_up) {
netif_carrier_on(netdev);
/* start_txq(netdev); */
txqs_wake(netdev);
@@ -1159,18 +1221,15 @@
static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
{
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = start_stop;
+ nctrl.ncmd.s.param1 = start_stop;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)lio->netdev;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams) < 0)
+ if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
}
@@ -1205,10 +1264,12 @@
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
unregister_netdev(netdev);
- delete_glist(lio);
+ delete_glists(lio);
free_netdev(netdev);
+ oct->props[ifidx].gmxport = -1;
+
oct->props[ifidx].netdev = NULL;
}
@@ -1230,7 +1291,8 @@
for (i = 0; i < oct->ifcount; i++) {
lio = GET_LIO(oct->props[i].netdev);
for (j = 0; j < lio->linfo.num_rxpciq; j++)
- octeon_unregister_droq_ops(oct, lio->linfo.rxpciq[j]);
+ octeon_unregister_droq_ops(oct,
+ lio->linfo.rxpciq[j].s.q_no);
}
for (i = 0; i < oct->ifcount; i++)
@@ -1326,6 +1388,16 @@
return 0;
}
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+ int q = 0;
+
+ if (netif_is_multiqueue(lio->netdev))
+ q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+ return q;
+}
+
/**
* \brief Check Tx queue state for a given network buffer
* @param lio per-network private data
@@ -1337,14 +1409,17 @@
if (netif_is_multiqueue(lio->netdev)) {
q = skb->queue_mapping;
- iq = lio->linfo.txpciq[(q & (lio->linfo.num_txpciq - 1))];
+ iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
} else {
iq = lio->txq;
+ q = iq;
}
if (octnet_iq_is_full(lio->oct_dev, iq))
return 0;
- wake_q(lio->netdev, q);
+
+ if (__netif_subqueue_stopped(lio->netdev, q))
+ wake_q(lio->netdev, q);
return 1;
}
@@ -1367,7 +1442,7 @@
check_txq_state(lio, skb);
- recv_buffer_free((struct sk_buff *)skb);
+ tx_buffer_free(skb);
}
/**
@@ -1380,7 +1455,7 @@
struct sk_buff *skb;
struct lio *lio;
struct octnic_gather *g;
- int i, frags;
+ int i, frags, iq;
finfo = (struct octnet_buf_free_info *)buf;
skb = finfo->skb;
@@ -1402,17 +1477,17 @@
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+ g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
- spin_lock(&lio->lock);
- list_add_tail(&g->list, &lio->glist);
- spin_unlock(&lio->lock);
+ iq = skb_iq(lio, skb);
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
check_txq_state(lio, skb); /* mq support: sub-queue state check */
- recv_buffer_free((struct sk_buff *)skb);
+ tx_buffer_free(skb);
}
/**
@@ -1426,7 +1501,7 @@
struct sk_buff *skb;
struct lio *lio;
struct octnic_gather *g;
- int i, frags;
+ int i, frags, iq;
sc = (struct octeon_soft_command *)buf;
skb = (struct sk_buff *)sc->callback_arg;
@@ -1450,13 +1525,14 @@
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+ g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
- spin_lock(&lio->lock);
- list_add_tail(&g->list, &lio->glist);
- spin_unlock(&lio->lock);
+ iq = skb_iq(lio, skb);
+
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
/* Don't free the skb yet */
@@ -1743,14 +1819,13 @@
static u16 select_q(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
- int qindex;
+ u32 qindex = 0;
struct lio *lio;
lio = GET_LIO(dev);
- /* select queue on chosen queue_mapping or core */
- qindex = skb_rx_queue_recorded(skb) ?
- skb_get_rx_queue(skb) : smp_processor_id();
- return (u16)(qindex & (lio->linfo.num_txpciq - 1));
+ qindex = skb_tx_hash(dev, skb);
+
+ return (u16)(qindex % (lio->linfo.num_txpciq));
}
/** Routine to push packets arriving on Octeon interface upto network layer.
@@ -1759,26 +1834,27 @@
* @param len - size of total data received.
* @param rh - Control header associated with the packet
* @param param - additional control data with the packet
+ * @param arg - farg registered in droq_ops
*/
static void
liquidio_push_packet(u32 octeon_id,
void *skbuff,
u32 len,
union octeon_rh *rh,
- void *param)
+ void *param,
+ void *arg)
{
struct napi_struct *napi = param;
- struct octeon_device *oct = lio_get_device(octeon_id);
struct sk_buff *skb = (struct sk_buff *)skbuff;
struct skb_shared_hwtstamps *shhwtstamps;
u64 ns;
- struct net_device *netdev =
- (struct net_device *)oct->props[rh->r_dh.link].netdev;
+ struct net_device *netdev = (struct net_device *)arg;
struct octeon_droq *droq = container_of(param, struct octeon_droq,
napi);
if (netdev) {
int packet_was_received;
struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
/* Do not proceed if the interface is not in RUNNING state. */
if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
@@ -1789,21 +1865,54 @@
skb->dev = netdev;
- if (rh->r_dh.has_hwtstamp) {
- /* timestamp is included from the hardware at the
- * beginning of the packet.
- */
- if (ifstate_check(lio,
- LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
- /* Nanoseconds are in the first 64-bits
- * of the packet.
- */
- memcpy(&ns, (skb->data), sizeof(ns));
- shhwtstamps = skb_hwtstamps(skb);
- shhwtstamps->hwtstamp =
- ns_to_ktime(ns + lio->ptp_adjust);
+ skb_record_rx_queue(skb, droq->q_no);
+ if (likely(len > MIN_SKB_SIZE)) {
+ struct octeon_skb_page_info *pg_info;
+ unsigned char *va;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ if (pg_info->page) {
+ /* For Paged allocation use the frags */
+ va = page_address(pg_info->page) +
+ pg_info->page_offset;
+ memcpy(skb->data, va, MIN_SKB_SIZE);
+ skb_put(skb, MIN_SKB_SIZE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ pg_info->page,
+ pg_info->page_offset +
+ MIN_SKB_SIZE,
+ len - MIN_SKB_SIZE,
+ LIO_RXBUFFER_SZ);
}
- skb_pull(skb, sizeof(ns));
+ } else {
+ struct octeon_skb_page_info *pg_info =
+ ((struct octeon_skb_page_info *)(skb->cb));
+ skb_copy_to_linear_data(skb, page_address(pg_info->page)
+ + pg_info->page_offset, len);
+ skb_put(skb, len);
+ put_page(pg_info->page);
+ }
+
+ if (((oct->chip_id == OCTEON_CN66XX) ||
+ (oct->chip_id == OCTEON_CN68XX)) &&
+ ptp_enable) {
+ if (rh->r_dh.has_hwtstamp) {
+ /* timestamp is included from the hardware at
+ * the beginning of the packet.
+ */
+ if (ifstate_check
+ (lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
+ /* Nanoseconds are in the first 64-bits
+ * of the packet.
+ */
+ memcpy(&ns, (skb->data), sizeof(ns));
+ shhwtstamps = skb_hwtstamps(skb);
+ shhwtstamps->hwtstamp =
+ ns_to_ktime(ns +
+ lio->ptp_adjust);
+ }
+ skb_pull(skb, sizeof(ns));
+ }
}
skb->protocol = eth_type_trans(skb, skb->dev);
@@ -1935,10 +2044,10 @@
* are for ingress packets.
*/
static inline int setup_io_queues(struct octeon_device *octeon_dev,
- struct net_device *net_device)
+ int ifidx)
{
- static int first_time = 1;
- static struct octeon_droq_ops droq_ops;
+ struct octeon_droq_ops droq_ops;
+ struct net_device *netdev;
static int cpu_id;
static int cpu_id_modulus;
struct octeon_droq *droq;
@@ -1947,23 +2056,26 @@
struct lio *lio;
int num_tx_descs;
- lio = GET_LIO(net_device);
- if (first_time) {
- first_time = 0;
- memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+ netdev = octeon_dev->props[ifidx].netdev;
- droq_ops.fptr = liquidio_push_packet;
+ lio = GET_LIO(netdev);
- droq_ops.poll_mode = 1;
- droq_ops.napi_fn = liquidio_napi_drv_callback;
- cpu_id = 0;
- cpu_id_modulus = num_present_cpus();
- }
+ memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+ droq_ops.fptr = liquidio_push_packet;
+ droq_ops.farg = (void *)netdev;
+
+ droq_ops.poll_mode = 1;
+ droq_ops.napi_fn = liquidio_napi_drv_callback;
+ cpu_id = 0;
+ cpu_id_modulus = num_present_cpus();
/* set up DROQs. */
for (q = 0; q < lio->linfo.num_rxpciq; q++) {
- q_no = lio->linfo.rxpciq[q];
-
+ q_no = lio->linfo.rxpciq[q].s.q_no;
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "setup_io_queues index:%d linfo.rxpciq.s.q_no:%d\n",
+ q, q_no);
retval = octeon_setup_droq(octeon_dev, q_no,
CFG_GET_NUM_RX_DESCS_NIC_IF
(octeon_get_conf(octeon_dev),
@@ -1980,7 +2092,11 @@
droq = octeon_dev->droq[q_no];
napi = &droq->napi;
- netif_napi_add(net_device, napi, liquidio_napi_poll, 64);
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "netif_napi_add netdev:%llx oct:%llx\n",
+ (u64)netdev,
+ (u64)octeon_dev);
+ netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
/* designate a CPU for this droq */
droq->cpu_id = cpu_id;
@@ -1996,9 +2112,9 @@
num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
(octeon_dev),
lio->ifidx);
- retval = octeon_setup_iq(octeon_dev, lio->linfo.txpciq[q],
- num_tx_descs,
- netdev_get_tx_queue(net_device, q));
+ retval = octeon_setup_iq(octeon_dev, ifidx, q,
+ lio->linfo.txpciq[q], num_tx_descs,
+ netdev_get_tx_queue(netdev, q));
if (retval) {
dev_err(&octeon_dev->pci_dev->dev,
" %s : Runtime IQ(TxQ) creation failed.\n",
@@ -2096,7 +2212,8 @@
netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
/* Inform that netif carrier is down */
lio->intf_open = 0;
- lio->linfo.link.s.status = 0;
+ lio->linfo.link.s.link_up = 0;
+ lio->link_changes++;
netif_carrier_off(netdev);
@@ -2235,7 +2352,6 @@
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
struct netdev_hw_addr *ha;
u64 *mc;
int ret, i;
@@ -2246,10 +2362,10 @@
/* Create a ctrl pkt command to be sent to core app. */
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = get_new_flags(netdev);
- nctrl.ncmd.s.param3 = mc_count;
+ nctrl.ncmd.s.param1 = get_new_flags(netdev);
+ nctrl.ncmd.s.param2 = mc_count;
nctrl.ncmd.s.more = mc_count;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
@@ -2270,9 +2386,7 @@
*/
nctrl.wait_time = 0;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
ret);
@@ -2290,19 +2404,17 @@
struct octeon_device *oct = lio->oct_dev;
struct sockaddr *addr = (struct sockaddr *)p;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
- if ((!is_valid_ether_addr(addr->sa_data)) ||
- (ifstate_check(lio, LIO_IFSTATE_RUNNING)))
+ if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = 0;
+ nctrl.ncmd.s.param1 = 0;
nctrl.ncmd.s.more = 1;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
nctrl.wait_time = 100;
@@ -2311,9 +2423,7 @@
/* The MAC Address is presented in network byte order. */
memcpy((u8 *)&nctrl.udd[0] + 2, addr->sa_data, ETH_ALEN);
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
return -ENOMEM;
@@ -2341,7 +2451,7 @@
oct = lio->oct_dev;
for (i = 0; i < lio->linfo.num_txpciq; i++) {
- iq_no = lio->linfo.txpciq[i];
+ iq_no = lio->linfo.txpciq[i].s.q_no;
iq_stats = &oct->instr_queue[iq_no]->stats;
pkts += iq_stats->tx_done;
drop += iq_stats->tx_dropped;
@@ -2357,7 +2467,7 @@
bytes = 0;
for (i = 0; i < lio->linfo.num_rxpciq; i++) {
- oq_no = lio->linfo.rxpciq[i];
+ oq_no = lio->linfo.rxpciq[i].s.q_no;
oq_stats = &oct->droq[oq_no]->stats;
pkts += oq_stats->rx_pkts_received;
drop += (oq_stats->rx_dropped +
@@ -2383,7 +2493,6 @@
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int max_frm_size = new_mtu + OCTNET_FRM_HEADER_SIZE;
int ret = 0;
@@ -2403,15 +2512,13 @@
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = new_mtu;
+ nctrl.ncmd.s.param1 = new_mtu;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
return -1;
@@ -2536,7 +2643,7 @@
}
octeon_free_soft_command(oct, sc);
- recv_buffer_free(skb);
+ tx_buffer_free(skb);
}
/* \brief Send a data packet that will be timestamped
@@ -2551,10 +2658,9 @@
{
int retval;
struct octeon_soft_command *sc;
- struct octeon_instr_ih *ih;
- struct octeon_instr_rdp *rdp;
struct lio *lio;
int ring_doorbell;
+ u32 len;
lio = finfo->lio;
@@ -2576,14 +2682,13 @@
sc->callback_arg = finfo->skb;
sc->iq_no = ndata->q_no;
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
ring_doorbell = !xmit_more;
retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
- sc, ih->dlengsz, ndata->reqtype);
+ sc, len, ndata->reqtype);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&oct->pci_dev->dev, "timestamp data packet failed status: %x\n",
retval);
octeon_free_soft_command(oct, sc);
@@ -2594,68 +2699,6 @@
return retval;
}
-static inline int is_ipv4(struct sk_buff *skb)
-{
- return (skb->protocol == htons(ETH_P_IP)) &&
- (ip_hdr(skb)->version == 4);
-}
-
-static inline int is_vlan(struct sk_buff *skb)
-{
- return skb->protocol == htons(ETH_P_8021Q);
-}
-
-static inline int is_ip_fragmented(struct sk_buff *skb)
-{
- /* The Don't fragment and Reserved flag fields are ignored.
- * IP is fragmented if
- * - the More fragments bit is set (indicating this IP is a fragment
- * with more to follow; the current offset could be 0 ).
- * - ths offset field is non-zero.
- */
- return (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? 1 : 0;
-}
-
-static inline int is_ipv6(struct sk_buff *skb)
-{
- return (skb->protocol == htons(ETH_P_IPV6)) &&
- (ipv6_hdr(skb)->version == 6);
-}
-
-static inline int is_with_extn_hdr(struct sk_buff *skb)
-{
- return (ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) &&
- (ipv6_hdr(skb)->nexthdr != IPPROTO_UDP);
-}
-
-static inline int is_tcpudp(struct sk_buff *skb)
-{
- return (ip_hdr(skb)->protocol == IPPROTO_TCP) ||
- (ip_hdr(skb)->protocol == IPPROTO_UDP);
-}
-
-static inline u32 get_ipv4_5tuple_tag(struct sk_buff *skb)
-{
- u32 tag;
- struct iphdr *iphdr = ip_hdr(skb);
-
- tag = crc32(0, &iphdr->protocol, 1);
- tag = crc32(tag, (u8 *)&iphdr->saddr, 8);
- tag = crc32(tag, skb_transport_header(skb), 4);
- return tag;
-}
-
-static inline u32 get_ipv6_5tuple_tag(struct sk_buff *skb)
-{
- u32 tag;
- struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
-
- tag = crc32(0, &ipv6hdr->nexthdr, 1);
- tag = crc32(tag, (u8 *)&ipv6hdr->saddr, 32);
- tag = crc32(tag, skb_transport_header(skb), 4);
- return tag;
-}
-
/** \brief Transmit networks packets to the Octeon interface
* @param skbuff skbuff struct to be passed to network layer.
* @param netdev pointer to network device
@@ -2670,18 +2713,22 @@
struct octnic_data_pkt ndata;
struct octeon_device *oct;
struct oct_iq_stats *stats;
- int cpu = 0, status = 0;
+ struct octeon_instr_irh *irh;
+ union tx_info *tx_info;
+ int status = 0;
int q_idx = 0, iq_no = 0;
- int xmit_more;
+ int xmit_more, j;
+ u64 dptr = 0;
u32 tag = 0;
lio = GET_LIO(netdev);
oct = lio->oct_dev;
if (netif_is_multiqueue(netdev)) {
- cpu = skb->queue_mapping;
- q_idx = (cpu & (lio->linfo.num_txpciq - 1));
- iq_no = lio->linfo.txpciq[q_idx];
+ q_idx = skb->queue_mapping;
+ q_idx = (q_idx % (lio->linfo.num_txpciq));
+ tag = q_idx;
+ iq_no = lio->linfo.txpciq[q_idx].s.q_no;
} else {
iq_no = lio->txq;
}
@@ -2692,11 +2739,11 @@
* transmitted.
*/
if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
- (!lio->linfo.link.s.status) ||
+ (!lio->linfo.link.s.link_up) ||
(skb->len <= 0)) {
netif_info(lio, tx_err, lio->netdev,
"Transmit failed link_status : %d\n",
- lio->linfo.link.s.status);
+ lio->linfo.link.s.link_up);
goto lio_xmit_failed;
}
@@ -2739,53 +2786,11 @@
ndata.datasize = skb->len;
cmdsetup.u64 = 0;
- cmdsetup.s.ifidx = lio->linfo.ifidx;
+ cmdsetup.s.iq_no = iq_no;
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (is_ipv4(skb) && !is_ip_fragmented(skb) && is_tcpudp(skb)) {
- tag = get_ipv4_5tuple_tag(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ cmdsetup.s.transport_csum = 1;
- cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
- if (ip_hdr(skb)->ihl > 5)
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV4OPTS;
-
- } else if (is_ipv6(skb)) {
- tag = get_ipv6_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
- if (is_with_extn_hdr(skb))
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV6EXTHDR;
-
- } else if (is_vlan(skb)) {
- if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
- == htons(ETH_P_IP) &&
- !is_ip_fragmented(skb) && is_tcpudp(skb)) {
- tag = get_ipv4_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset =
- sizeof(struct vlan_ethhdr) + 1;
-
- if (ip_hdr(skb)->ihl > 5)
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV4OPTS;
-
- } else if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
- == htons(ETH_P_IPV6)) {
- tag = get_ipv6_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset =
- sizeof(struct vlan_ethhdr) + 1;
-
- if (is_with_extn_hdr(skb))
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV6EXTHDR;
- }
- }
- }
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
cmdsetup.s.timestamp = 1;
@@ -2793,20 +2798,20 @@
if (skb_shinfo(skb)->nr_frags == 0) {
cmdsetup.s.u.datasize = skb->len;
- octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
/* Offload checksum calculation for TCP/UDP packets */
- ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
- skb->data,
- skb->len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
+ dptr = dma_map_single(&oct->pci_dev->dev,
+ skb->data,
+ skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
__func__);
return NETDEV_TX_BUSY;
}
- finfo->dptr = ndata.cmd.dptr;
-
+ ndata.cmd.cmd2.dptr = dptr;
+ finfo->dptr = dptr;
ndata.reqtype = REQTYPE_NORESP_NET;
} else {
@@ -2814,9 +2819,10 @@
struct skb_frag_struct *frag;
struct octnic_gather *g;
- spin_lock(&lio->lock);
- g = (struct octnic_gather *)list_delete_head(&lio->glist);
- spin_unlock(&lio->lock);
+ spin_lock(&lio->glist_lock[q_idx]);
+ g = (struct octnic_gather *)
+ list_delete_head(&lio->glist[q_idx]);
+ spin_unlock(&lio->glist_lock[q_idx]);
if (!g) {
netif_info(lio, tx_err, lio->netdev,
@@ -2826,7 +2832,7 @@
cmdsetup.s.gather = 1;
cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
- octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
memset(g->sg, 0, g->sg_size);
@@ -2853,34 +2859,43 @@
frag->size,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev,
+ g->sg[i >> 2].ptr[i & 3])) {
+ dma_unmap_single(&oct->pci_dev->dev,
+ g->sg[0].ptr[0],
+ skb->len - skb->data_len,
+ DMA_TO_DEVICE);
+ for (j = 1; j < i; j++) {
+ frag = &skb_shinfo(skb)->frags[j - 1];
+ dma_unmap_page(&oct->pci_dev->dev,
+ g->sg[j >> 2].ptr[j & 3],
+ frag->size,
+ DMA_TO_DEVICE);
+ }
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+ __func__);
+ return NETDEV_TX_BUSY;
+ }
+
add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
i++;
}
- ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
- dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
- __func__);
- dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
- skb->len - skb->data_len,
- DMA_TO_DEVICE);
- return NETDEV_TX_BUSY;
- }
+ dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
+ g->sg_size, DMA_TO_DEVICE);
+ dptr = g->sg_dma_ptr;
- finfo->dptr = ndata.cmd.dptr;
+ ndata.cmd.cmd2.dptr = dptr;
+ finfo->dptr = dptr;
finfo->g = g;
ndata.reqtype = REQTYPE_NORESP_NET_SG;
}
- if (skb_shinfo(skb)->gso_size) {
- struct octeon_instr_irh *irh =
- (struct octeon_instr_irh *)&ndata.cmd.irh;
- union tx_info *tx_info = (union tx_info *)&ndata.cmd.ossp[0];
+ irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+ tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
- irh->len = 1; /* to indicate that ossp[0] contains tx_info */
+ if (skb_shinfo(skb)->gso_size) {
tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
}
@@ -2910,9 +2925,10 @@
stats->tx_dropped++;
netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
iq_no, stats->tx_dropped);
- dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
- ndata.datasize, DMA_TO_DEVICE);
- recv_buffer_free(skb);
+ if (dptr)
+ dma_unmap_single(&oct->pci_dev->dev, dptr,
+ ndata.datasize, DMA_TO_DEVICE);
+ tx_buffer_free(skb);
return NETDEV_TX_OK;
}
@@ -2932,27 +2948,24 @@
txqs_wake(netdev);
}
-int liquidio_set_feature(struct net_device *netdev, int cmd)
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
{
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = cmd;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = OCTNIC_LROIPV4 | OCTNIC_LROIPV6;
+ nctrl.ncmd.s.param1 = param1;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
ret);
@@ -3008,10 +3021,12 @@
return 0;
if ((features & NETIF_F_LRO) && (lio->dev_capability & NETIF_F_LRO))
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
else if (!(features & NETIF_F_LRO) &&
(lio->dev_capability & NETIF_F_LRO))
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
return 0;
}
@@ -3082,24 +3097,27 @@
{
struct octeon_device *oct = (struct octeon_device *)buf;
struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
- int ifidx = 0;
+ int gmxport = 0;
union oct_link_status *ls;
int i;
- if ((recv_pkt->buffer_size[0] != sizeof(*ls)) ||
- (recv_pkt->rh.r_nic_info.ifidx > oct->ifcount)) {
+ if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
recv_pkt->buffer_size[0],
- recv_pkt->rh.r_nic_info.ifidx);
+ recv_pkt->rh.r_nic_info.gmxport);
goto nic_info_err;
}
- ifidx = recv_pkt->rh.r_nic_info.ifidx;
+ gmxport = recv_pkt->rh.r_nic_info.gmxport;
ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
-
- update_link_status(oct->props[ifidx].netdev, ls);
+ for (i = 0; i < oct->ifcount; i++) {
+ if (oct->props[i].gmxport == gmxport) {
+ update_link_status(oct->props[i].netdev, ls);
+ break;
+ }
+ }
nic_info_err:
for (i = 0; i < recv_pkt->buffer_count; i++)
@@ -3125,13 +3143,13 @@
struct liquidio_if_cfg_context *ctx;
struct liquidio_if_cfg_resp *resp;
struct octdev_props *props;
- int retval, num_iqueues, num_oqueues, q_no;
- u64 q_mask;
+ int retval, num_iqueues, num_oqueues;
int num_cpus = num_online_cpus();
union oct_nic_if_cfg if_cfg;
unsigned int base_queue;
unsigned int gmx_port_id;
u32 resp_size, ctx_size;
+ u32 ifidx_or_pfnum;
/* This is to handle link status changes */
octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3167,13 +3185,14 @@
CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i);
gmx_port_id =
CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
+ ifidx_or_pfnum = i;
if (num_iqueues > num_cpus)
num_iqueues = num_cpus;
if (num_oqueues > num_cpus)
num_oqueues = num_cpus;
dev_dbg(&octeon_dev->pci_dev->dev,
"requesting config for interface %d, iqs %d, oqs %d\n",
- i, num_iqueues, num_oqueues);
+ ifidx_or_pfnum, num_iqueues, num_oqueues);
ACCESS_ONCE(ctx->cond) = 0;
ctx->octeon_id = lio_get_device_id(octeon_dev);
init_waitqueue_head(&ctx->wc);
@@ -3183,8 +3202,11 @@
if_cfg.s.num_oqueues = num_oqueues;
if_cfg.s.base_queue = base_queue;
if_cfg.s.gmx_port_id = gmx_port_id;
+
+ sc->iq_no = 0;
+
octeon_prepare_soft_command(octeon_dev, sc, OPCODE_NIC,
- OPCODE_NIC_IF_CFG, i,
+ OPCODE_NIC_IF_CFG, 0,
if_cfg.u64, 0);
sc->callback = if_cfg_callback;
@@ -3192,7 +3214,7 @@
sc->wait_time = 1000;
retval = octeon_send_soft_command(octeon_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&octeon_dev->pci_dev->dev,
"iq/oq config failed status: %x\n",
retval);
@@ -3234,8 +3256,7 @@
goto setup_nic_dev_fail;
}
- props = &octeon_dev->props[i];
- props->netdev = netdev;
+ SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
if (num_iqueues > 1)
lionetdevops.ndo_select_queue = select_q;
@@ -3249,23 +3270,21 @@
memset(lio, 0, sizeof(struct lio));
- lio->linfo.ifidx = resp->cfg_info.ifidx;
- lio->ifidx = resp->cfg_info.ifidx;
+ lio->ifidx = ifidx_or_pfnum;
+
+ props = &octeon_dev->props[i];
+ props->gmxport = resp->cfg_info.linfo.gmxport;
+ props->netdev = netdev;
lio->linfo.num_rxpciq = num_oqueues;
lio->linfo.num_txpciq = num_iqueues;
- q_mask = resp->cfg_info.oqmask;
- /* q_mask is 0-based and already verified mask is nonzero */
for (j = 0; j < num_oqueues; j++) {
- q_no = __ffs64(q_mask);
- q_mask &= (~(1UL << q_no));
- lio->linfo.rxpciq[j] = q_no;
+ lio->linfo.rxpciq[j].u64 =
+ resp->cfg_info.linfo.rxpciq[j].u64;
}
- q_mask = resp->cfg_info.iqmask;
for (j = 0; j < num_iqueues; j++) {
- q_no = __ffs64(q_mask);
- q_mask &= (~(1UL << q_no));
- lio->linfo.txpciq[j] = q_no;
+ lio->linfo.txpciq[j].u64 =
+ resp->cfg_info.linfo.txpciq[j].u64;
}
lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
@@ -3274,13 +3293,15 @@
lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
lio->dev_capability = NETIF_F_HIGHDMA
- | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
- | NETIF_F_SG | NETIF_F_RXCSUM
- | NETIF_F_TSO | NETIF_F_TSO6
- | NETIF_F_LRO;
+ | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
+ | NETIF_F_SG | NETIF_F_RXCSUM
+ | NETIF_F_GRO
+ | NETIF_F_TSO | NETIF_F_TSO6
+ | NETIF_F_LRO;
netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
- netdev->features = lio->dev_capability;
+ netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
+
netdev->vlan_features = lio->dev_capability;
netdev->hw_features = lio->dev_capability;
@@ -3291,7 +3312,6 @@
lio->oct_dev = octeon_dev;
lio->octprops = props;
lio->netdev = netdev;
- spin_lock_init(&lio->lock);
dev_dbg(&octeon_dev->pci_dev->dev,
"if%d gmx: %d hw_addr: 0x%llx\n", i,
@@ -3306,23 +3326,22 @@
ether_addr_copy(netdev->dev_addr, mac);
- if (setup_io_queues(octeon_dev, netdev)) {
+ /* By default all interfaces on a single Octeon uses the same
+ * tx and rx queues
+ */
+ lio->txq = lio->linfo.txpciq[0].s.q_no;
+ lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+ if (setup_io_queues(octeon_dev, i)) {
dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
goto setup_nic_dev_fail;
}
ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
- /* By default all interfaces on a single Octeon uses the same
- * tx and rx queues
- */
- lio->txq = lio->linfo.txpciq[0];
- lio->rxq = lio->linfo.rxpciq[0];
-
lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
- if (setup_glist(lio)) {
+ if (setup_glists(octeon_dev, lio, num_iqueues)) {
dev_err(&octeon_dev->pci_dev->dev,
"Gather list allocation failed\n");
goto setup_nic_dev_fail;
@@ -3331,10 +3350,13 @@
/* Register ethtool support */
liquidio_set_ethtool_ops(netdev);
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+ if (netdev->features & NETIF_F_LRO)
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
if ((debug != -1) && (debug & NETIF_MSG_HW))
- liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE,
+ 0);
/* Register the network device with the OS */
if (register_netdev(netdev)) {
@@ -3346,13 +3368,7 @@
"Setup NIC ifidx:%d mac:%02x%02x%02x%02x%02x%02x\n",
i, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
netif_carrier_off(netdev);
-
- if (lio->linfo.link.s.status) {
- netif_carrier_on(netdev);
- start_txq(netdev);
- } else {
- netif_carrier_off(netdev);
- }
+ lio->link_changes++;
ifstate_set(lio, LIO_IFSTATE_REGISTERED);
@@ -3386,7 +3402,7 @@
static int liquidio_init_nic_module(struct octeon_device *oct)
{
struct oct_intrmod_cfg *intrmod_cfg;
- int retval = 0;
+ int i, retval = 0;
int num_nic_ports = CFG_GET_NUM_NIC_PORTS(octeon_get_conf(oct));
dev_dbg(&oct->pci_dev->dev, "Initializing network interfaces\n");
@@ -3400,6 +3416,9 @@
memset(oct->props, 0,
sizeof(struct octdev_props) * num_nic_ports);
+ for (i = 0; i < MAX_OCTEON_LINKS; i++)
+ oct->props[i].gmxport = -1;
+
retval = setup_nic_devices(oct);
if (retval) {
dev_err(&oct->pci_dev->dev, "Setup NIC devices failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 0ac347c..2179691 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -174,9 +174,11 @@
/*------------------------- End Scatter/Gather ---------------------------*/
#define OCTNET_FRM_PTP_HEADER_SIZE 8
-#define OCTNET_FRM_HEADER_SIZE 30 /* PTP timestamp + VLAN + Ethernet */
-#define OCTNET_MIN_FRM_SIZE (64 + OCTNET_FRM_PTP_HEADER_SIZE)
+#define OCTNET_FRM_HEADER_SIZE 22 /* VLAN + Ethernet */
+
+#define OCTNET_MIN_FRM_SIZE 64
+
#define OCTNET_MAX_FRM_SIZE (16000 + OCTNET_FRM_HEADER_SIZE)
#define OCTNET_DEFAULT_FRM_SIZE (1500 + OCTNET_FRM_HEADER_SIZE)
@@ -258,19 +260,19 @@
u64 more:6; /* How many udd words follow the command */
- u64 param1:29;
+ u64 reserved:29;
- u64 param2:16;
+ u64 param1:16;
- u64 param3:8;
+ u64 param2:8;
#else
- u64 param3:8;
+ u64 param2:8;
- u64 param2:16;
+ u64 param1:16;
- u64 param1:29;
+ u64 reserved:29;
u64 more:6;
@@ -283,8 +285,140 @@
#define OCTNET_CMD_SIZE (sizeof(union octnet_cmd))
+/* Instruction Header (DPI - CN23xx) - for OCTEON-III models */
+struct octeon_instr_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+ /** Reserved3 */
+ u64 reserved3:1;
+
+ /** Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /** Data length OR no. of entries in gather list */
+ u64 dlengsz:14;
+
+ /** Front Data size */
+ u64 fsz:6;
+
+ /** Reserved2 */
+ u64 reserved2:4;
+
+ /** PKI port kind - PKIND */
+ u64 pkind:6;
+
+ /** Reserved1 */
+ u64 reserved1:32;
+
+#else
+ /** Reserved1 */
+ u64 reserved1:32;
+
+ /** PKI port kind - PKIND */
+ u64 pkind:6;
+
+ /** Reserved2 */
+ u64 reserved2:4;
+
+ /** Front Data size */
+ u64 fsz:6;
+
+ /** Data length OR no. of entries in gather list */
+ u64 dlengsz:14;
+
+ /** Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /** Reserved3 */
+ u64 reserved3:1;
+
+#endif
+};
+
+/* Optional PKI Instruction Header(PKI IH) - for OCTEON CN23XX models */
+/** BIG ENDIAN format. */
+struct octeon_instr_pki_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+ /** Wider bit */
+ u64 w:1;
+
+ /** Raw mode indicator 1 = RAW */
+ u64 raw:1;
+
+ /** Use Tag */
+ u64 utag:1;
+
+ /** Use QPG */
+ u64 uqpg:1;
+
+ /** Reserved2 */
+ u64 reserved2:1;
+
+ /** Parse Mode */
+ u64 pm:3;
+
+ /** Skip Length */
+ u64 sl:8;
+
+ /** Use Tag Type */
+ u64 utt:1;
+
+ /** Tag type */
+ u64 tagtype:2;
+
+ /** Reserved1 */
+ u64 reserved1:2;
+
+ /** QPG Value */
+ u64 qpg:11;
+
+ /** Tag Value */
+ u64 tag:32;
+
+#else
+
+ /** Tag Value */
+ u64 tag:32;
+
+ /** QPG Value */
+ u64 qpg:11;
+
+ /** Reserved1 */
+ u64 reserved1:2;
+
+ /** Tag type */
+ u64 tagtype:2;
+
+ /** Use Tag Type */
+ u64 utt:1;
+
+ /** Skip Length */
+ u64 sl:8;
+
+ /** Parse Mode */
+ u64 pm:3;
+
+ /** Reserved2 */
+ u64 reserved2:1;
+
+ /** Use QPG */
+ u64 uqpg:1;
+
+ /** Use Tag */
+ u64 utag:1;
+
+ /** Raw mode indicator 1 = RAW */
+ u64 raw:1;
+
+ /** Wider bit */
+ u64 w:1;
+#endif
+
+};
+
/** Instruction Header */
-struct octeon_instr_ih {
+struct octeon_instr_ih2 {
#ifdef __BIG_ENDIAN_BITFIELD
/** Raw mode indicator 1 = RAW */
u64 raw:1;
@@ -412,10 +546,9 @@
u64 opcode:4;
u64 subcode:8;
u64 len:3; /** additional 64-bit words */
- u64 rid:13;
- u64 reserved:4;
+ u64 reserved:8;
u64 extra:25;
- u64 ifidx:7;
+ u64 gmxport:16;
} r_nic_info;
#else
u64 u64;
@@ -448,10 +581,9 @@
u64 opcode:4;
} r_core_drv_init;
struct {
- u64 ifidx:7;
+ u64 gmxport:16;
u64 extra:25;
- u64 reserved:4;
- u64 rid:13;
+ u64 reserved:8;
u64 len:3; /** additional 64-bit words */
u64 subcode:8;
u64 opcode:4;
@@ -461,30 +593,25 @@
#define OCT_RH_SIZE (sizeof(union octeon_rh))
-#define OCT_PKT_PARAM_IPV4OPTS 1
-#define OCT_PKT_PARAM_IPV6EXTHDR 2
-
union octnic_packet_params {
u32 u32;
struct {
#ifdef __BIG_ENDIAN_BITFIELD
- u32 reserved:6;
+ u32 reserved:24;
+ u32 ip_csum:1; /* Perform IP header checksum(s) */
+ /* Perform Outer transport header checksum */
+ u32 transport_csum:1;
+ /* Find tunnel, and perform transport csum. */
u32 tnl_csum:1;
- u32 ip_csum:1;
- u32 ipv4opts_ipv6exthdr:2;
- u32 ipsec_ops:4;
- u32 tsflag:1;
- u32 csoffset:9;
- u32 ifidx:8;
+ u32 tsflag:1; /* Timestamp this packet */
+ u32 ipsec_ops:4; /* IPsec operation */
#else
- u32 ifidx:8;
- u32 csoffset:9;
- u32 tsflag:1;
u32 ipsec_ops:4;
- u32 ipv4opts_ipv6exthdr:2;
- u32 ip_csum:1;
+ u32 tsflag:1;
u32 tnl_csum:1;
- u32 reserved:6;
+ u32 transport_csum:1;
+ u32 ip_csum:1;
+ u32 reserved:24;
#endif
} s;
};
@@ -496,53 +623,90 @@
struct {
#ifdef __BIG_ENDIAN_BITFIELD
u64 duplex:8;
- u64 status:8;
u64 mtu:16;
u64 speed:16;
+ u64 link_up:1;
u64 autoneg:1;
u64 interface:4;
u64 pause:1;
- u64 reserved:10;
+ u64 reserved:17;
#else
- u64 reserved:10;
+ u64 reserved:17;
u64 pause:1;
u64 interface:4;
u64 autoneg:1;
+ u64 link_up:1;
u64 speed:16;
u64 mtu:16;
- u64 status:8;
u64 duplex:8;
#endif
} s;
};
+/** The txpciq info passed to host from the firmware */
+
+union oct_txpciq {
+ u64 u64;
+
+ struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u64 q_no:8;
+ u64 port:8;
+ u64 pkind:6;
+ u64 use_qpg:1;
+ u64 qpg:11;
+ u64 reserved:30;
+#else
+ u64 reserved:30;
+ u64 qpg:11;
+ u64 use_qpg:1;
+ u64 pkind:6;
+ u64 port:8;
+ u64 q_no:8;
+#endif
+ } s;
+};
+
+/** The rxpciq info passed to host from the firmware */
+
+union oct_rxpciq {
+ u64 u64;
+
+ struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u64 q_no:8;
+ u64 reserved:56;
+#else
+ u64 reserved:56;
+ u64 q_no:8;
+#endif
+ } s;
+};
+
/** Information for a OCTEON ethernet interface shared between core & host. */
struct oct_link_info {
union oct_link_status link;
u64 hw_addr;
#ifdef __BIG_ENDIAN_BITFIELD
- u16 gmxport;
- u8 rsvd[3];
- u8 num_txpciq;
- u8 num_rxpciq;
- u8 ifidx;
+ u64 gmxport:16;
+ u64 rsvd:32;
+ u64 num_txpciq:8;
+ u64 num_rxpciq:8;
#else
- u8 ifidx;
- u8 num_rxpciq;
- u8 num_txpciq;
- u8 rsvd[3];
- u16 gmxport;
+ u64 num_rxpciq:8;
+ u64 num_txpciq:8;
+ u64 rsvd:32;
+ u64 gmxport:16;
#endif
- u8 txpciq[MAX_IOQS_PER_NICIF];
- u8 rxpciq[MAX_IOQS_PER_NICIF];
+ union oct_txpciq txpciq[MAX_IOQS_PER_NICIF];
+ union oct_rxpciq rxpciq[MAX_IOQS_PER_NICIF];
};
#define OCT_LINK_INFO_SIZE (sizeof(struct oct_link_info))
struct liquidio_if_cfg_info {
- u64 ifidx;
u64 iqmask; /** mask for IQs enabled for the port */
u64 oqmask; /** mask for OQs enabled for the port */
struct oct_link_info linfo; /** initial link information */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 8e23e3f..3290009 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -741,49 +741,59 @@
return oct;
}
+/* this function is only for setting up the first queue */
int octeon_setup_instr_queues(struct octeon_device *oct)
{
- u32 i, num_iqs = 0;
+ u32 num_iqs = 0;
u32 num_descs = 0;
+ u32 iq_no = 0;
+ union oct_txpciq txpciq;
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
+ num_iqs = 1;
/* this causes queue 0 to be default queue */
- if (OCTEON_CN6XXX(oct)) {
- num_iqs = 1;
+ if (OCTEON_CN6XXX(oct))
num_descs =
CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
- }
oct->num_iqs = 0;
- for (i = 0; i < num_iqs; i++) {
- oct->instr_queue[i] =
+ oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+ numa_node);
+ if (!oct->instr_queue[0])
+ oct->instr_queue[0] =
vmalloc(sizeof(struct octeon_instr_queue));
- if (!oct->instr_queue[i])
- return 1;
-
- memset(oct->instr_queue[i], 0,
- sizeof(struct octeon_instr_queue));
-
- oct->instr_queue[i]->app_ctx = (void *)(size_t)i;
- if (octeon_init_instr_queue(oct, i, num_descs))
- return 1;
-
- oct->num_iqs++;
+ if (!oct->instr_queue[0])
+ return 1;
+ memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
+ oct->instr_queue[0]->q_index = 0;
+ oct->instr_queue[0]->app_ctx = (void *)(size_t)0;
+ oct->instr_queue[0]->ifidx = 0;
+ txpciq.u64 = 0;
+ txpciq.s.q_no = iq_no;
+ txpciq.s.use_qpg = 0;
+ txpciq.s.qpg = 0;
+ if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
+ /* prevent memory leak */
+ vfree(oct->instr_queue[0]);
+ return 1;
}
+ oct->num_iqs++;
return 0;
}
int octeon_setup_output_queues(struct octeon_device *oct)
{
- u32 i, num_oqs = 0;
+ u32 num_oqs = 0;
u32 num_descs = 0;
u32 desc_size = 0;
+ u32 oq_no = 0;
+ int numa_node = cpu_to_node(oq_no % num_online_cpus());
+ num_oqs = 1;
/* this causes queue 0 to be default queue */
if (OCTEON_CN6XXX(oct)) {
- /* CFG_GET_OQ_MAX_BASE_Q(CHIP_FIELD(oct, cn6xxx, conf)); */
- num_oqs = 1;
num_descs =
CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
desc_size =
@@ -791,19 +801,15 @@
}
oct->num_oqs = 0;
+ oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+ if (!oct->droq[0])
+ oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+ if (!oct->droq[0])
+ return 1;
- for (i = 0; i < num_oqs; i++) {
- oct->droq[i] = vmalloc(sizeof(*oct->droq[i]));
- if (!oct->droq[i])
- return 1;
-
- memset(oct->droq[i], 0, sizeof(struct octeon_droq));
-
- if (octeon_init_droq(oct, i, num_descs, desc_size, NULL))
- return 1;
-
- oct->num_oqs++;
- }
+ if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL))
+ return 1;
+ oct->num_oqs++;
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 36e1f85..0950b94 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -267,6 +267,7 @@
/* Each interface in the Octeon device has a network
* device pointer (used for OS specific calls).
*/
+ int gmxport;
struct net_device *netdev;
};
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 174072b..59a5293 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -151,22 +151,26 @@
struct octeon_droq *droq)
{
u32 i;
+ struct octeon_skb_page_info *pg_info;
for (i = 0; i < droq->max_count; i++) {
- if (droq->recv_buf_list[i].buffer) {
- if (droq->desc_ring) {
- lio_unmap_ring_info(oct->pci_dev,
- (u64)droq->
- desc_ring[i].info_ptr,
- OCT_DROQ_INFO_SIZE);
- lio_unmap_ring(oct->pci_dev,
- (u64)droq->desc_ring[i].
- buffer_ptr,
- droq->buffer_size);
- }
- recv_buffer_free(droq->recv_buf_list[i].buffer);
- droq->recv_buf_list[i].buffer = NULL;
- }
+ pg_info = &droq->recv_buf_list[i].pg_info;
+
+ if (pg_info->dma)
+ lio_unmap_ring(oct->pci_dev,
+ (u64)pg_info->dma);
+ pg_info->dma = 0;
+
+ if (pg_info->page)
+ recv_buffer_destroy(droq->recv_buf_list[i].buffer,
+ pg_info);
+
+ if (droq->desc_ring && droq->desc_ring[i].info_ptr)
+ lio_unmap_ring_info(oct->pci_dev,
+ (u64)droq->
+ desc_ring[i].info_ptr,
+ OCT_DROQ_INFO_SIZE);
+ droq->recv_buf_list[i].buffer = NULL;
}
octeon_droq_reset_indices(droq);
@@ -181,11 +185,12 @@
struct octeon_droq_desc *desc_ring = droq->desc_ring;
for (i = 0; i < droq->max_count; i++) {
- buf = recv_buffer_alloc(oct, droq->q_no, droq->buffer_size);
+ buf = recv_buffer_alloc(oct, &droq->recv_buf_list[i].pg_info);
if (!buf) {
dev_err(&oct->pci_dev->dev, "%s buffer alloc failed\n",
__func__);
+ droq->stats.rx_alloc_failure++;
return -ENOMEM;
}
@@ -197,9 +202,7 @@
/* map ring buffers into memory */
desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
desc_ring[i].buffer_ptr =
- lio_map_ring(oct->pci_dev,
- droq->recv_buf_list[i].buffer,
- droq->buffer_size);
+ lio_map_ring(droq->recv_buf_list[i].buffer);
}
octeon_droq_reset_indices(droq);
@@ -242,6 +245,8 @@
struct octeon_droq *droq;
u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0;
u32 c_pkts_per_intr = 0, c_refill_threshold = 0;
+ int orig_node = dev_to_node(&oct->pci_dev->dev);
+ int numa_node = cpu_to_node(q_no % num_online_cpus());
dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
@@ -261,15 +266,23 @@
struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
- c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+ c_refill_threshold =
+ (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+ } else {
+ return 1;
}
droq->max_count = c_num_descs;
droq->buffer_size = c_buf_size;
desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE;
+ set_dev_node(&oct->pci_dev->dev, numa_node);
droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
(dma_addr_t *)&droq->desc_ring_dma);
+ set_dev_node(&oct->pci_dev->dev, orig_node);
+ if (!droq->desc_ring)
+ droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
+ (dma_addr_t *)&droq->desc_ring_dma);
if (!droq->desc_ring) {
dev_err(&oct->pci_dev->dev,
@@ -283,12 +296,11 @@
droq->max_count);
droq->info_list =
- cnnic_alloc_aligned_dma(oct->pci_dev,
- (droq->max_count * OCT_DROQ_INFO_SIZE),
- &droq->info_alloc_size,
- &droq->info_base_addr,
- &droq->info_list_dma);
-
+ cnnic_numa_alloc_aligned_dma((droq->max_count *
+ OCT_DROQ_INFO_SIZE),
+ &droq->info_alloc_size,
+ &droq->info_base_addr,
+ numa_node);
if (!droq->info_list) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -297,7 +309,12 @@
}
droq->recv_buf_list = (struct octeon_recv_buffer *)
- vmalloc(droq->max_count *
+ vmalloc_node(droq->max_count *
+ OCT_DROQ_RECVBUF_SIZE,
+ numa_node);
+ if (!droq->recv_buf_list)
+ droq->recv_buf_list = (struct octeon_recv_buffer *)
+ vmalloc(droq->max_count *
OCT_DROQ_RECVBUF_SIZE);
if (!droq->recv_buf_list) {
dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
@@ -358,6 +375,7 @@
struct octeon_recv_pkt *recv_pkt;
struct octeon_recv_info *recv_info;
u32 i, bytes_left;
+ struct octeon_skb_page_info *pg_info;
info = &droq->info_list[idx];
@@ -375,9 +393,14 @@
bytes_left = (u32)info->length;
while (buf_cnt) {
- lio_unmap_ring(octeon_dev->pci_dev,
- (u64)droq->desc_ring[idx].buffer_ptr,
- droq->buffer_size);
+ {
+ pg_info = &droq->recv_buf_list[idx].pg_info;
+
+ lio_unmap_ring(octeon_dev->pci_dev,
+ (u64)pg_info->dma);
+ pg_info->page = NULL;
+ pg_info->dma = 0;
+ }
recv_pkt->buffer_size[i] =
(bytes_left >=
@@ -449,6 +472,7 @@
void *buf = NULL;
u8 *data;
u32 desc_refilled = 0;
+ struct octeon_skb_page_info *pg_info;
desc_ring = droq->desc_ring;
@@ -458,13 +482,22 @@
* the buffer, else allocate.
*/
if (!droq->recv_buf_list[droq->refill_idx].buffer) {
- buf = recv_buffer_alloc(octeon_dev, droq->q_no,
- droq->buffer_size);
+ pg_info =
+ &droq->recv_buf_list[droq->refill_idx].pg_info;
+ /* Either recycle the existing pages or go for
+ * new page alloc
+ */
+ if (pg_info->page)
+ buf = recv_buffer_reuse(octeon_dev, pg_info);
+ else
+ buf = recv_buffer_alloc(octeon_dev, pg_info);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (!buf)
+ if (!buf) {
+ droq->stats.rx_alloc_failure++;
break;
+ }
droq->recv_buf_list[droq->refill_idx].buffer =
buf;
data = get_rbd(buf);
@@ -476,11 +509,8 @@
droq->recv_buf_list[droq->refill_idx].data = data;
desc_ring[droq->refill_idx].buffer_ptr =
- lio_map_ring(octeon_dev->pci_dev,
- droq->recv_buf_list[droq->
- refill_idx].buffer,
- droq->buffer_size);
-
+ lio_map_ring(droq->recv_buf_list[droq->
+ refill_idx].buffer);
/* Reset any previous values in the length field. */
droq->info_list[droq->refill_idx].length = 0;
@@ -586,6 +616,8 @@
for (pkt = 0; pkt < pkt_count; pkt++) {
u32 pkt_len = 0;
struct sk_buff *nicbuf = NULL;
+ struct octeon_skb_page_info *pg_info;
+ void *buf;
info = &droq->info_list[droq->read_idx];
octeon_swap_8B_data((u64 *)info, 2);
@@ -605,7 +637,6 @@
rh = &info->rh;
total_len += (u32)info->length;
-
if (OPCODE_SLOW_PATH(rh)) {
u32 buf_cnt;
@@ -614,50 +645,44 @@
droq->refill_count += buf_cnt;
} else {
if (info->length <= droq->buffer_size) {
- lio_unmap_ring(oct->pci_dev,
- (u64)droq->desc_ring[
- droq->read_idx].buffer_ptr,
- droq->buffer_size);
pkt_len = (u32)info->length;
nicbuf = droq->recv_buf_list[
droq->read_idx].buffer;
+ pg_info = &droq->recv_buf_list[
+ droq->read_idx].pg_info;
+ if (recv_buffer_recycle(oct, pg_info))
+ pg_info->page = NULL;
droq->recv_buf_list[droq->read_idx].buffer =
NULL;
INCR_INDEX_BY1(droq->read_idx, droq->max_count);
- skb_put(nicbuf, pkt_len);
droq->refill_count++;
} else {
- nicbuf = octeon_fast_packet_alloc(oct, droq,
- droq->q_no,
- (u32)
+ nicbuf = octeon_fast_packet_alloc((u32)
info->length);
pkt_len = 0;
/* nicbuf allocation can fail. We'll handle it
* inside the loop.
*/
while (pkt_len < info->length) {
- int cpy_len;
+ int cpy_len, idx = droq->read_idx;
- cpy_len = ((pkt_len +
- droq->buffer_size) >
- info->length) ?
+ cpy_len = ((pkt_len + droq->buffer_size)
+ > info->length) ?
((u32)info->length - pkt_len) :
droq->buffer_size;
if (nicbuf) {
- lio_unmap_ring(oct->pci_dev,
- (u64)
- droq->desc_ring
- [droq->read_idx].
- buffer_ptr,
- droq->
- buffer_size);
octeon_fast_packet_next(droq,
nicbuf,
cpy_len,
- droq->
- read_idx
- );
+ idx);
+ buf = droq->recv_buf_list[idx].
+ buffer;
+ recv_buffer_fast_free(buf);
+ droq->recv_buf_list[idx].buffer
+ = NULL;
+ } else {
+ droq->stats.rx_alloc_failure++;
}
pkt_len += cpy_len;
@@ -668,12 +693,14 @@
}
if (nicbuf) {
- if (droq->ops.fptr)
+ if (droq->ops.fptr) {
droq->ops.fptr(oct->octeon_id,
- nicbuf, pkt_len,
- rh, &droq->napi);
- else
+ nicbuf, pkt_len,
+ rh, &droq->napi,
+ droq->ops.farg);
+ } else {
recv_buffer_free(nicbuf);
+ }
}
}
@@ -681,16 +708,16 @@
int desc_refilled = octeon_droq_refill(oct, droq);
/* Flush the droq descriptor data to memory to be sure
- * that when we update the credits the data in memory
- * is accurate.
- */
+ * that when we update the credits the data in memory
+ * is accurate.
+ */
wmb();
writel((desc_refilled), droq->pkts_credit_reg);
/* make sure mmio write completes */
mmiowb();
}
- } /* for ( each packet )... */
+ } /* for (each packet)... */
/* Increment refill_count by the number of buffers processed. */
droq->stats.pkts_received += pkt;
@@ -937,6 +964,7 @@
spin_lock_irqsave(&droq->lock, flags);
droq->ops.fptr = NULL;
+ droq->ops.farg = NULL;
droq->ops.drop_on_max = 0;
spin_unlock_irqrestore(&droq->lock, flags);
@@ -949,6 +977,7 @@
u32 desc_size, void *app_ctx)
{
struct octeon_droq *droq;
+ int numa_node = cpu_to_node(q_no % num_online_cpus());
if (oct->droq[q_no]) {
dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n",
@@ -957,7 +986,9 @@
}
/* Allocate the DS for the new droq. */
- droq = vmalloc(sizeof(*droq));
+ droq = vmalloc_node(sizeof(*droq), numa_node);
+ if (!droq)
+ droq = vmalloc(sizeof(*droq));
if (!droq)
goto create_droq_fail;
memset(droq, 0, sizeof(struct octeon_droq));
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 7940cce..1ca9c4f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -65,6 +65,17 @@
#define OCT_DROQ_INFO_SIZE (sizeof(struct octeon_droq_info))
+struct octeon_skb_page_info {
+ /* DMA address for the page */
+ dma_addr_t dma;
+
+ /* Page for the rx dma **/
+ struct page *page;
+
+ /** which offset into page */
+ unsigned int page_offset;
+};
+
/** Pointer to data buffer.
* Driver keeps a pointer to the data buffer that it made available to
* the Octeon device. Since the descriptor ring keeps physical (bus)
@@ -77,6 +88,9 @@
/** Data in the packet buffer. */
u8 *data;
+
+ /** pg_info **/
+ struct octeon_skb_page_info pg_info;
};
#define OCT_DROQ_RECVBUF_SIZE (sizeof(struct octeon_recv_buffer))
@@ -106,6 +120,10 @@
/** Num of Packets dropped due to receive path failures. */
u64 rx_dropped;
+
+ /** Num of failures of recv_buffer_alloc() */
+ u64 rx_alloc_failure;
+
};
#define POLL_EVENT_INTR_ARRIVED 1
@@ -213,7 +231,8 @@
* data in the buffer. The receive header gives the port
* number to the caller. Function pointer is set by caller.
*/
- void (*fptr)(u32, void *, u32, union octeon_rh *, void *);
+ void (*fptr)(u32, void *, u32, union octeon_rh *, void *, void *);
+ void *farg;
/* This function will be called by the driver for all NAPI related
* events. The first param is the octeon id. The second param is the
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 592fe49..513f8a0 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -75,14 +75,16 @@
* a Octeon device has one such structure to represent it.
*/
struct octeon_instr_queue {
+ struct octeon_device *oct_dev;
+
/** A spinlock to protect access to the input ring. */
spinlock_t lock;
/** Flag that indicates if the queue uses 64 byte commands. */
u32 iqcmd_64B:1;
- /** Queue Number. */
- u32 iq_no:5;
+ /** Queue info. */
+ union oct_txpciq txpciq;
u32 rsvd:17;
@@ -147,6 +149,13 @@
/** Application context */
void *app_ctx;
+
+ /* network stack queue index */
+ int q_index;
+
+ /*os ifidx associated with this queue */
+ int ifidx;
+
};
/*---------------------- INSTRUCTION FORMAT ----------------------------*/
@@ -176,12 +185,12 @@
/** 64-byte instruction format.
* Format of instruction for a 64-byte mode input queue.
*/
-struct octeon_instr_64B {
+struct octeon_instr2_64B {
/** Pointer where the input data is available. */
u64 dptr;
/** Instruction Header. */
- u64 ih;
+ u64 ih2;
/** Input Request Header. */
u64 irh;
@@ -198,10 +207,40 @@
u64 rptr;
u64 reserved;
+};
+
+struct octeon_instr3_64B {
+ /** Pointer where the input data is available. */
+ u64 dptr;
+
+ /** Instruction Header. */
+ u64 ih3;
+
+ /** Instruction Header. */
+ u64 pki_ih3;
+
+ /** Input Request Header. */
+ u64 irh;
+
+ /** opcode/subcode specific parameters */
+ u64 ossp[2];
+
+ /** Return Data Parameters */
+ u64 rdp;
+
+ /** Pointer where the response for a RAW mode packet will be written
+ * by Octeon.
+ */
+ u64 rptr;
};
-#define OCT_64B_INSTR_SIZE (sizeof(struct octeon_instr_64B))
+union octeon_instr_64B {
+ struct octeon_instr2_64B cmd2;
+ struct octeon_instr3_64B cmd3;
+};
+
+#define OCT_64B_INSTR_SIZE (sizeof(union octeon_instr_64B))
/** The size of each buffer in soft command buffer pool
*/
@@ -214,7 +253,8 @@
u32 size;
/** Command and return status */
- struct octeon_instr_64B cmd;
+ union octeon_instr_64B cmd;
+
#define COMPLETION_WORD_INIT 0xffffffffffffffffULL
u64 *status_word;
@@ -268,14 +308,15 @@
/**
* octeon_init_instr_queue()
* @param octeon_dev - pointer to the octeon device structure.
- * @param iq_no - queue to be initialized (0 <= q_no <= 3).
+ * @param txpciq - queue to be initialized (0 <= q_no <= 3).
*
* Called at driver init time for each input queue. iq_conf has the
* configuration parameters for the queue.
*
* @return Success: 0 Failure: 1
*/
-int octeon_init_instr_queue(struct octeon_device *octeon_dev, u32 iq_no,
+int octeon_init_instr_queue(struct octeon_device *octeon_dev,
+ union oct_txpciq txpciq,
u32 num_descs);
/**
@@ -313,7 +354,8 @@
int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc);
-int octeon_setup_iq(struct octeon_device *oct, u32 iq_no,
- u32 num_descs, void *app_ctx);
+int octeon_setup_iq(struct octeon_device *oct, int ifidx,
+ int q_index, union oct_txpciq iq_no, u32 num_descs,
+ void *app_ctx);
#endif /* __OCTEON_IQ_H__ */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index cbd0819..0ff3efc 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -126,22 +126,27 @@
}
static inline void *
-cnnic_alloc_aligned_dma(struct pci_dev *pci_dev,
- u32 size,
- u32 *alloc_size,
- size_t *orig_ptr,
- size_t *dma_addr __attribute__((unused)))
+cnnic_numa_alloc_aligned_dma(u32 size,
+ u32 *alloc_size,
+ size_t *orig_ptr,
+ int numa_node)
{
int retries = 0;
void *ptr = NULL;
#define OCTEON_MAX_ALLOC_RETRIES 1
do {
- ptr =
- (void *)__get_free_pages(GFP_KERNEL,
- get_order(size));
+ struct page *page = NULL;
+
+ page = alloc_pages_node(numa_node,
+ GFP_KERNEL,
+ get_order(size));
+ if (!page)
+ page = alloc_pages(GFP_KERNEL,
+ get_order(size));
+ ptr = (void *)page_address(page);
if ((unsigned long)ptr & 0x07) {
- free_pages((unsigned long)ptr, get_order(size));
+ __free_pages(page, get_order(size));
ptr = NULL;
/* Increment the size required if the first
* attempt failed.
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index b3abe58..9c14484 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -48,11 +48,11 @@
*/
int rxq;
- /** Guards the glist */
- spinlock_t lock;
+ /** Guards each glist */
+ spinlock_t *glist_lock;
- /** Linked list of gather components */
- struct list_head glist;
+ /** Array of gather component linked lists */
+ struct list_head *glist;
/** Pointer to the NIC properties for the Octeon device this network
* interface is associated with.
@@ -67,6 +67,9 @@
/** Link information sent by the core application for this interface. */
struct oct_link_info linfo;
+ /** counter of link changes */
+ u64 link_changes;
+
/** Size of Tx queue for this octeon device. */
u32 tx_qsize;
@@ -111,8 +114,9 @@
* \brief Enable or disable feature
* @param netdev pointer to network device
* @param cmd Command that just requires acknowledgment
+ * @param param1 Parameter to command
*/
-int liquidio_set_feature(struct net_device *netdev, int cmd);
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1);
/**
* \brief Link control command completion callback
@@ -131,14 +135,30 @@
*/
void liquidio_set_ethtool_ops(struct net_device *netdev);
-static inline void
-*recv_buffer_alloc(struct octeon_device *oct __attribute__((unused)),
- u32 q_no __attribute__((unused)), u32 size)
-{
#define SKB_ADJ_MASK 0x3F
#define SKB_ADJ (SKB_ADJ_MASK + 1)
- struct sk_buff *skb = dev_alloc_skb(size + SKB_ADJ);
+#define MIN_SKB_SIZE 256 /* 8 bytes and more - 8 bytes for PTP */
+#define LIO_RXBUFFER_SZ 2048
+
+static inline void
+*recv_buffer_alloc(struct octeon_device *oct,
+ struct octeon_skb_page_info *pg_info)
+{
+ struct page *page;
+ struct sk_buff *skb;
+ struct octeon_skb_page_info *skb_pg_info;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ if (unlikely(!page))
+ return NULL;
+
+ skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+ if (unlikely(!skb)) {
+ __free_page(page);
+ pg_info->page = NULL;
+ return NULL;
+ }
if ((unsigned long)skb->data & SKB_ADJ_MASK) {
u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
@@ -146,11 +166,151 @@
skb_reserve(skb, r);
}
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ /* Get DMA info */
+ pg_info->dma = dma_map_page(&oct->pci_dev->dev, page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* Mapping failed!! */
+ if (dma_mapping_error(&oct->pci_dev->dev, pg_info->dma)) {
+ __free_page(page);
+ dev_kfree_skb_any((struct sk_buff *)skb);
+ pg_info->page = NULL;
+ return NULL;
+ }
+
+ pg_info->page = page;
+ pg_info->page_offset = 0;
+ skb_pg_info->page = page;
+ skb_pg_info->page_offset = 0;
+ skb_pg_info->dma = pg_info->dma;
+
return (void *)skb;
}
+static inline void
+*recv_buffer_fast_alloc(u32 size)
+{
+ struct sk_buff *skb;
+ struct octeon_skb_page_info *skb_pg_info;
+
+ skb = dev_alloc_skb(size + SKB_ADJ);
+ if (unlikely(!skb))
+ return NULL;
+
+ if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+ u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+ skb_reserve(skb, r);
+ }
+
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ skb_pg_info->page = NULL;
+ skb_pg_info->page_offset = 0;
+ skb_pg_info->dma = 0;
+
+ return skb;
+}
+
+static inline int
+recv_buffer_recycle(struct octeon_device *oct, void *buf)
+{
+ struct octeon_skb_page_info *pg_info = buf;
+
+ if (!pg_info->page) {
+ dev_err(&oct->pci_dev->dev, "%s: pg_info->page NULL\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ if (unlikely(page_count(pg_info->page) != 1) ||
+ unlikely(page_to_nid(pg_info->page) != numa_node_id())) {
+ dma_unmap_page(&oct->pci_dev->dev,
+ pg_info->dma, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+ return -ENOMEM;
+ }
+
+ /* Flip to other half of the buffer */
+ if (pg_info->page_offset == 0)
+ pg_info->page_offset = LIO_RXBUFFER_SZ;
+ else
+ pg_info->page_offset = 0;
+ page_ref_inc(pg_info->page);
+
+ return 0;
+}
+
+static inline void
+*recv_buffer_reuse(struct octeon_device *oct, void *buf)
+{
+ struct octeon_skb_page_info *pg_info = buf, *skb_pg_info;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+ if (unlikely(!skb)) {
+ dma_unmap_page(&oct->pci_dev->dev,
+ pg_info->dma, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
+ return NULL;
+ }
+
+ if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+ u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+ skb_reserve(skb, r);
+ }
+
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ skb_pg_info->page = pg_info->page;
+ skb_pg_info->page_offset = pg_info->page_offset;
+ skb_pg_info->dma = pg_info->dma;
+
+ return skb;
+}
+
+static inline void
+recv_buffer_destroy(void *buffer, struct octeon_skb_page_info *pg_info)
+{
+ struct sk_buff *skb = (struct sk_buff *)buffer;
+
+ put_page(pg_info->page);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+
+ if (skb)
+ dev_kfree_skb_any(skb);
+}
+
static inline void recv_buffer_free(void *buffer)
{
+ struct sk_buff *skb = (struct sk_buff *)buffer;
+ struct octeon_skb_page_info *pg_info;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+
+ if (pg_info->page) {
+ put_page(pg_info->page);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+ }
+
+ dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void
+recv_buffer_fast_free(void *buffer)
+{
+ dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void tx_buffer_free(void *buffer)
+{
dev_kfree_skb_any((struct sk_buff *)buffer);
}
@@ -159,7 +319,17 @@
#define lio_dma_free(oct, size, virt_addr, dma_addr) \
dma_free_coherent(&oct->pci_dev->dev, size, virt_addr, dma_addr)
-#define get_rbd(ptr) (((struct sk_buff *)(ptr))->data)
+static inline
+void *get_rbd(struct sk_buff *skb)
+{
+ struct octeon_skb_page_info *pg_info;
+ unsigned char *va;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ va = page_address(pg_info->page) + pg_info->page_offset;
+
+ return va;
+}
static inline u64
lio_map_ring_info(struct octeon_droq *droq, u32 i)
@@ -183,33 +353,44 @@
}
static inline u64
-lio_map_ring(struct pci_dev *pci_dev,
- void *buf, u32 size)
+lio_map_ring(void *buf)
{
dma_addr_t dma_addr;
- dma_addr = dma_map_single(&pci_dev->dev, get_rbd(buf), size,
- DMA_FROM_DEVICE);
+ struct sk_buff *skb = (struct sk_buff *)buf;
+ struct octeon_skb_page_info *pg_info;
- BUG_ON(dma_mapping_error(&pci_dev->dev, dma_addr));
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ if (!pg_info->page) {
+ pr_err("%s: pg_info->page NULL\n", __func__);
+ WARN_ON(1);
+ }
+
+ /* Get DMA info */
+ dma_addr = pg_info->dma;
+ if (!pg_info->dma) {
+ pr_err("%s: ERROR it should be already available\n",
+ __func__);
+ WARN_ON(1);
+ }
+ dma_addr += pg_info->page_offset;
return (u64)dma_addr;
}
static inline void
lio_unmap_ring(struct pci_dev *pci_dev,
- u64 buf_ptr, u32 size)
+ u64 buf_ptr)
+
{
- dma_unmap_single(&pci_dev->dev,
- buf_ptr, size,
- DMA_FROM_DEVICE);
+ dma_unmap_page(&pci_dev->dev,
+ buf_ptr, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
}
-static inline void *octeon_fast_packet_alloc(struct octeon_device *oct,
- struct octeon_droq *droq,
- u32 q_no, u32 size)
+static inline void *octeon_fast_packet_alloc(u32 size)
{
- return recv_buffer_alloc(oct, q_no, size);
+ return recv_buffer_fast_alloc(size);
}
static inline void octeon_fast_packet_next(struct octeon_droq *droq,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 1a01915..7843b8a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -44,11 +44,11 @@
void *
octeon_alloc_soft_command_resp(struct octeon_device *oct,
- struct octeon_instr_64B *cmd,
- size_t rdatasize)
+ union octeon_instr_64B *cmd,
+ u32 rdatasize)
{
struct octeon_soft_command *sc;
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -59,24 +59,25 @@
return NULL;
/* Copy existing command structure into the soft command */
- memcpy(&sc->cmd, cmd, sizeof(struct octeon_instr_64B));
+ memcpy(&sc->cmd, cmd, sizeof(union octeon_instr_64B));
/* Add in the response related fields. Opcode and Param are already
* there.
*/
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- ih->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
irh->rflag = 1; /* a response is required */
- irh->len = 4; /* means four 64-bit words immediately follow irh */
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
rdp->pcie_port = oct->pcie_port;
rdp->rlen = rdatasize;
*sc->status_word = COMPLETION_WORD_INIT;
+ sc->cmd.cmd2.rptr = sc->dmarptr;
+
sc->wait_time = 1000;
sc->timeout = jiffies + sc->wait_time;
@@ -119,12 +120,11 @@
static inline struct octeon_soft_command
*octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams)
+ struct octnic_ctrl_pkt *nctrl)
{
struct octeon_soft_command *sc = NULL;
u8 *data;
- size_t rdatasize;
+ u32 rdatasize;
u32 uddsize = 0, datasize = 0;
uddsize = (u32)(nctrl->ncmd.s.more * 8);
@@ -143,7 +143,7 @@
data = (u8 *)sc->virtdptr;
- memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
+ memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
octeon_swap_8B_data((u64 *)data, (OCTNET_CMD_SIZE >> 3));
@@ -152,6 +152,8 @@
memcpy(data + OCTNET_CMD_SIZE, nctrl->udd, uddsize);
}
+ sc->iq_no = (u32)nctrl->iq_no;
+
octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_CMD,
0, 0, 0);
@@ -164,13 +166,12 @@
int
octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams)
+ struct octnic_ctrl_pkt *nctrl)
{
int retval;
struct octeon_soft_command *sc = NULL;
- sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl, nparams);
+ sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl);
if (!sc) {
dev_err(&oct->pci_dev->dev, "%s soft command alloc failed\n",
__func__);
@@ -178,7 +179,7 @@
}
retval = octeon_send_soft_command(oct, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
octeon_free_soft_command(oct, sc);
dev_err(&oct->pci_dev->dev, "%s soft command send failed status: %x\n",
__func__, retval);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index 0238857..b71a2bb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -52,6 +52,9 @@
/** Additional data that may be needed by some commands. */
u64 udd[MAX_NCTRL_UDD];
+ /** Input queue to use to send this command. */
+ u64 iq_no;
+
/** Time to wait for Octeon software to respond to this control command.
* If wait_time is 0, OSI assumes no response is expected.
*/
@@ -82,7 +85,7 @@
u32 datasize;
/** Command to be passed to the Octeon device software. */
- struct octeon_instr_64B cmd;
+ union octeon_instr_64B cmd;
/** Input queue to use to send this command. */
u32 q_no;
@@ -94,15 +97,14 @@
*/
union octnic_cmd_setup {
struct {
- u32 ifidx:8;
- u32 cksum_offset:7;
+ u32 iq_no:8;
u32 gather:1;
u32 timestamp:1;
- u32 ipv4opts_ipv6exthdr:2;
u32 ip_csum:1;
+ u32 transport_csum:1;
u32 tnl_csum:1;
+ u32 rsvd:19;
- u32 rsvd:11;
union {
u32 datasize;
u32 gatherptrs;
@@ -113,16 +115,129 @@
};
-struct octnic_ctrl_params {
- u32 resp_order;
-};
-
static inline int octnet_iq_is_full(struct octeon_device *oct, u32 q_no)
{
return ((u32)atomic_read(&oct->instr_queue[q_no]->instr_pending)
>= (oct->instr_queue[q_no]->max_count - 2));
}
+static inline void
+octnet_prepare_pci_cmd_o2(struct octeon_device *oct,
+ union octeon_instr_64B *cmd,
+ union octnic_cmd_setup *setup, u32 tag)
+{
+ struct octeon_instr_ih2 *ih2;
+ struct octeon_instr_irh *irh;
+ union octnic_packet_params packet_params;
+ int port;
+
+ memset(cmd, 0, sizeof(union octeon_instr_64B));
+
+ ih2 = (struct octeon_instr_ih2 *)&cmd->cmd2.ih2;
+
+ /* assume that rflag is cleared so therefore front data will only have
+ * irh and ossp[0], ossp[1] for a total of 32 bytes
+ */
+ ih2->fsz = 24;
+
+ ih2->tagtype = ORDERED_TAG;
+ ih2->grp = DEFAULT_POW_GRP;
+
+ port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
+
+ if (tag)
+ ih2->tag = tag;
+ else
+ ih2->tag = LIO_DATA(port);
+
+ ih2->raw = 1;
+ ih2->qos = (port & 3) + 4; /* map qos based on interface */
+
+ if (!setup->s.gather) {
+ ih2->dlengsz = setup->s.u.datasize;
+ } else {
+ ih2->gather = 1;
+ ih2->dlengsz = setup->s.u.gatherptrs;
+ }
+
+ irh = (struct octeon_instr_irh *)&cmd->cmd2.irh;
+
+ irh->opcode = OPCODE_NIC;
+ irh->subcode = OPCODE_NIC_NW_DATA;
+
+ packet_params.u32 = 0;
+
+ packet_params.s.ip_csum = setup->s.ip_csum;
+ packet_params.s.transport_csum = setup->s.transport_csum;
+ packet_params.s.tnl_csum = setup->s.tnl_csum;
+ packet_params.s.tsflag = setup->s.timestamp;
+
+ irh->ossp = packet_params.u32;
+}
+
+static inline void
+octnet_prepare_pci_cmd_o3(struct octeon_device *oct,
+ union octeon_instr_64B *cmd,
+ union octnic_cmd_setup *setup, u32 tag)
+{
+ struct octeon_instr_irh *irh;
+ struct octeon_instr_ih3 *ih3;
+ struct octeon_instr_pki_ih3 *pki_ih3;
+ union octnic_packet_params packet_params;
+ int port;
+
+ memset(cmd, 0, sizeof(union octeon_instr_64B));
+
+ ih3 = (struct octeon_instr_ih3 *)&cmd->cmd3.ih3;
+ pki_ih3 = (struct octeon_instr_pki_ih3 *)&cmd->cmd3.pki_ih3;
+
+ /* assume that rflag is cleared so therefore front data will only have
+ * irh and ossp[1] and ossp[2] for a total of 24 bytes
+ */
+ ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
+ /*PKI IH*/
+ ih3->fsz = 24 + 8;
+
+ if (!setup->s.gather) {
+ ih3->dlengsz = setup->s.u.datasize;
+ } else {
+ ih3->gather = 1;
+ ih3->dlengsz = setup->s.u.gatherptrs;
+ }
+
+ pki_ih3->w = 1;
+ pki_ih3->raw = 1;
+ pki_ih3->utag = 1;
+ pki_ih3->utt = 1;
+ pki_ih3->uqpg = oct->instr_queue[setup->s.iq_no]->txpciq.s.use_qpg;
+
+ port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
+
+ if (tag)
+ pki_ih3->tag = tag;
+ else
+ pki_ih3->tag = LIO_DATA(port);
+
+ pki_ih3->tagtype = ORDERED_TAG;
+ pki_ih3->qpg = oct->instr_queue[setup->s.iq_no]->txpciq.s.qpg;
+ pki_ih3->pm = 0x7; /*0x7 - meant for Parse nothing, uninterpreted*/
+ pki_ih3->sl = 8; /* sl will be sizeof(pki_ih3)*/
+
+ irh = (struct octeon_instr_irh *)&cmd->cmd3.irh;
+
+ irh->opcode = OPCODE_NIC;
+ irh->subcode = OPCODE_NIC_NW_DATA;
+
+ packet_params.u32 = 0;
+
+ packet_params.s.ip_csum = setup->s.ip_csum;
+ packet_params.s.transport_csum = setup->s.transport_csum;
+ packet_params.s.tnl_csum = setup->s.tnl_csum;
+ packet_params.s.tsflag = setup->s.timestamp;
+
+ irh->ossp = packet_params.u32;
+}
+
/** Utility function to prepare a 64B NIC instruction based on a setup command
* @param cmd - pointer to instruction to be filled in.
* @param setup - pointer to the setup structure
@@ -131,59 +246,13 @@
* Assumes the cmd instruction is pre-allocated, but no fields are filled in.
*/
static inline void
-octnet_prepare_pci_cmd(struct octeon_instr_64B *cmd,
+octnet_prepare_pci_cmd(struct octeon_device *oct, union octeon_instr_64B *cmd,
union octnic_cmd_setup *setup, u32 tag)
{
- struct octeon_instr_ih *ih;
- struct octeon_instr_irh *irh;
- union octnic_packet_params packet_params;
-
- memset(cmd, 0, sizeof(struct octeon_instr_64B));
-
- ih = (struct octeon_instr_ih *)&cmd->ih;
-
- /* assume that rflag is cleared so therefore front data will only have
- * irh and ossp[1] and ossp[2] for a total of 24 bytes
- */
- ih->fsz = 24;
-
- ih->tagtype = ORDERED_TAG;
- ih->grp = DEFAULT_POW_GRP;
-
- if (tag)
- ih->tag = tag;
+ if (OCTEON_CN6XXX(oct))
+ octnet_prepare_pci_cmd_o2(oct, cmd, setup, tag);
else
- ih->tag = LIO_DATA(setup->s.ifidx);
-
- ih->raw = 1;
- ih->qos = (setup->s.ifidx & 3) + 4; /* map qos based on interface */
-
- if (!setup->s.gather) {
- ih->dlengsz = setup->s.u.datasize;
- } else {
- ih->gather = 1;
- ih->dlengsz = setup->s.u.gatherptrs;
- }
-
- irh = (struct octeon_instr_irh *)&cmd->irh;
-
- irh->opcode = OPCODE_NIC;
- irh->subcode = OPCODE_NIC_NW_DATA;
-
- packet_params.u32 = 0;
-
- if (setup->s.cksum_offset) {
- packet_params.s.csoffset = setup->s.cksum_offset;
- packet_params.s.ipv4opts_ipv6exthdr =
- setup->s.ipv4opts_ipv6exthdr;
- }
-
- packet_params.s.ip_csum = setup->s.ip_csum;
- packet_params.s.tnl_csum = setup->s.tnl_csum;
- packet_params.s.ifidx = setup->s.ifidx;
- packet_params.s.tsflag = setup->s.timestamp;
-
- irh->ossp = packet_params.u32;
+ octnet_prepare_pci_cmd_o3(oct, cmd, setup, tag);
}
/** Allocate and a soft command with space for a response immediately following
@@ -198,8 +267,8 @@
*/
void *
octeon_alloc_soft_command_resp(struct octeon_device *oct,
- struct octeon_instr_64B *cmd,
- size_t rdatasize);
+ union octeon_instr_64B *cmd,
+ u32 rdatasize);
/** Send a NIC data packet to the device
* @param oct - octeon device pointer
@@ -214,14 +283,11 @@
/** Send a NIC control packet to the device
* @param oct - octeon device pointer
* @param nctrl - control structure with command, timout, and callback info
- * @param nparams - response control structure
- *
* @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
* queue should be stopped, and IQ_SEND_OK if it sent okay.
*/
int
octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams);
+ struct octnic_ctrl_pkt *nctrl);
#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 9313915..8649677 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -69,12 +69,16 @@
/* Return 0 on success, 1 on failure */
int octeon_init_instr_queue(struct octeon_device *oct,
- u32 iq_no, u32 num_descs)
+ union oct_txpciq txpciq,
+ u32 num_descs)
{
struct octeon_instr_queue *iq;
struct octeon_iq_config *conf = NULL;
+ u32 iq_no = (u32)txpciq.s.q_no;
u32 q_size;
struct cavium_wq *db_wq;
+ int orig_node = dev_to_node(&oct->pci_dev->dev);
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
if (OCTEON_CN6XXX(oct))
conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
@@ -95,9 +99,15 @@
q_size = (u32)conf->instr_type * num_descs;
iq = oct->instr_queue[iq_no];
+ iq->oct_dev = oct;
+ set_dev_node(&oct->pci_dev->dev, numa_node);
iq->base_addr = lio_dma_alloc(oct, q_size,
(dma_addr_t *)&iq->base_addr_dma);
+ set_dev_node(&oct->pci_dev->dev, orig_node);
+ if (!iq->base_addr)
+ iq->base_addr = lio_dma_alloc(oct, q_size,
+ (dma_addr_t *)&iq->base_addr_dma);
if (!iq->base_addr) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
iq_no);
@@ -109,7 +119,11 @@
/* Initialize a list to holds requests that have been posted to Octeon
* but has yet to be fetched by octeon
*/
- iq->request_list = vmalloc(sizeof(*iq->request_list) * num_descs);
+ iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
+ numa_node);
+ if (!iq->request_list)
+ iq->request_list = vmalloc(sizeof(*iq->request_list) *
+ num_descs);
if (!iq->request_list) {
lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
@@ -122,7 +136,7 @@
dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
- iq->iq_no = iq_no;
+ iq->txpciq.u64 = txpciq.u64;
iq->fill_threshold = (u32)conf->db_min;
iq->fill_cnt = 0;
iq->host_write_index = 0;
@@ -189,26 +203,38 @@
/* Return 0 on success, 1 on failure */
int octeon_setup_iq(struct octeon_device *oct,
- u32 iq_no,
+ int ifidx,
+ int q_index,
+ union oct_txpciq txpciq,
u32 num_descs,
void *app_ctx)
{
+ u32 iq_no = (u32)txpciq.s.q_no;
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
+
if (oct->instr_queue[iq_no]) {
dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
iq_no);
+ oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
oct->instr_queue[iq_no]->app_ctx = app_ctx;
return 0;
}
oct->instr_queue[iq_no] =
- vmalloc(sizeof(struct octeon_instr_queue));
+ vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
+ if (!oct->instr_queue[iq_no])
+ oct->instr_queue[iq_no] =
+ vmalloc(sizeof(struct octeon_instr_queue));
if (!oct->instr_queue[iq_no])
return 1;
memset(oct->instr_queue[iq_no], 0,
sizeof(struct octeon_instr_queue));
+ oct->instr_queue[iq_no]->q_index = q_index;
oct->instr_queue[iq_no]->app_ctx = app_ctx;
- if (octeon_init_instr_queue(oct, iq_no, num_descs)) {
+ oct->instr_queue[iq_no]->ifidx = ifidx;
+
+ if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
vfree(oct->instr_queue[iq_no]);
oct->instr_queue[iq_no] = NULL;
return 1;
@@ -395,7 +421,7 @@
case REQTYPE_SOFT_COMMAND:
sc = buf;
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
if (irh->rflag) {
/* We're expecting a response from Octeon.
* It's up to lio_process_ordered_list() to
@@ -558,7 +584,7 @@
u64 ossp1)
{
struct octeon_config *oct_cfg;
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -567,73 +593,69 @@
oct_cfg = octeon_get_conf(oct);
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- ih->tagtype = ATOMIC_TAG;
- ih->tag = LIO_CONTROL;
- ih->raw = 1;
- ih->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ ih2->tagtype = ATOMIC_TAG;
+ ih2->tag = LIO_CONTROL;
+ ih2->raw = 1;
+ ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
if (sc->datasize) {
- ih->dlengsz = sc->datasize;
- ih->rs = 1;
+ ih2->dlengsz = sc->datasize;
+ ih2->rs = 1;
}
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
irh->opcode = opcode;
irh->subcode = subcode;
/* opcode/subcode specific parameters (ossp) */
irh->ossp = irh_ossp;
- sc->cmd.ossp[0] = ossp0;
- sc->cmd.ossp[1] = ossp1;
+ sc->cmd.cmd2.ossp[0] = ossp0;
+ sc->cmd.cmd2.ossp[1] = ossp1;
if (sc->rdatasize) {
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
rdp->pcie_port = oct->pcie_port;
rdp->rlen = sc->rdatasize;
irh->rflag = 1;
- irh->len = 4;
- ih->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+ ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
} else {
irh->rflag = 0;
- irh->len = 2;
- ih->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+ ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
}
-
- while (!(oct->io_qmask.iq & (1 << sc->iq_no)))
- sc->iq_no++;
}
int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc)
{
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
+ u32 len;
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- if (ih->dlengsz) {
- BUG_ON(!sc->dmadptr);
- sc->cmd.dptr = sc->dmadptr;
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ if (ih2->dlengsz) {
+ WARN_ON(!sc->dmadptr);
+ sc->cmd.cmd2.dptr = sc->dmadptr;
}
-
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
if (irh->rflag) {
BUG_ON(!sc->dmarptr);
BUG_ON(!sc->status_word);
*sc->status_word = COMPLETION_WORD_INIT;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
- sc->cmd.rptr = sc->dmarptr;
+ sc->cmd.cmd2.rptr = sc->dmarptr;
}
+ len = (u32)ih2->dlengsz;
if (sc->wait_time)
sc->timeout = jiffies + sc->wait_time;
- return octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
- (u32)ih->dlengsz, REQTYPE_SOFT_COMMAND);
+ return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
+ len, REQTYPE_SOFT_COMMAND));
}
int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 6287a7c..e2e9103 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -85,6 +85,7 @@
u32 status;
u64 status64;
struct octeon_instr_rdp *rdp;
+ u64 rptr;
ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST];
@@ -102,7 +103,8 @@
sc = (struct octeon_soft_command *)ordered_sc_list->
head.next;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ rptr = sc->cmd.cmd2.rptr;
status = OCTEON_REQUEST_PENDING;
@@ -110,7 +112,7 @@
* to where rptr is pointing to
*/
dma_sync_single_for_cpu(&octeon_dev->pci_dev->dev,
- sc->cmd.rptr, rdp->rlen,
+ rptr, rdp->rlen,
DMA_FROM_DEVICE);
status64 = *sc->status_word;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 477db47..c45de49 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -64,6 +64,7 @@
#include <net/bonding.h>
#include <net/addrconf.h>
#include <asm/uaccess.h>
+#include <linux/crash_dump.h>
#include "cxgb4.h"
#include "t4_regs.h"
@@ -206,7 +207,7 @@
static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
module_param_array(num_vf, uint, NULL, 0644);
-MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
+MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3, deprecated parameter - please use the pci sysfs interface.");
#endif
/* TX Queue select used to determine what algorithm to use for selecting TX
@@ -460,11 +461,8 @@
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
- if (!(dev->flags & IFF_PROMISC)) {
- __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
- if (!(dev->flags & IFF_ALLMULTI))
- __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
- }
+ __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
+ __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
(dev->flags & IFF_PROMISC) ? 1 : 0,
@@ -3735,7 +3733,8 @@
return ret;
/* Contact FW, advertising Master capability */
- ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
+ ret = t4_fw_hello(adap, adap->mbox, adap->mbox,
+ is_kdump_kernel() ? MASTER_MUST : MASTER_MAY, &state);
if (ret < 0) {
dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
ret);
@@ -4366,6 +4365,11 @@
if (q10g > netif_get_num_default_rss_queues())
q10g = netif_get_num_default_rss_queues();
+ /* Reduce memory usage in kdump environment, disable all offload.
+ */
+ if (is_kdump_kernel())
+ adap->params.offload = 0;
+
for_each_port(adap, i) {
struct port_info *pi = adap2pinfo(adap, i);
@@ -4829,6 +4833,60 @@
return -EINVAL;
}
+#ifdef CONFIG_PCI_IOV
+static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ int err = 0;
+ int current_vfs = pci_num_vf(pdev);
+ u32 pcie_fw;
+ void __iomem *regs;
+
+ regs = pci_ioremap_bar(pdev, 0);
+ if (!regs) {
+ dev_err(&pdev->dev, "cannot map device registers\n");
+ return -ENOMEM;
+ }
+
+ pcie_fw = readl(regs + PCIE_FW_A);
+ iounmap(regs);
+ /* Check if cxgb4 is the MASTER and fw is initialized */
+ if (!(pcie_fw & PCIE_FW_INIT_F) ||
+ !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
+ PCIE_FW_MASTER_G(pcie_fw) != 4) {
+ dev_warn(&pdev->dev,
+ "cxgb4 driver needs to be MASTER to support SRIOV\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* If any of the VF's is already assigned to Guest OS, then
+ * SRIOV for the same cannot be modified
+ */
+ if (current_vfs && pci_vfs_assigned(pdev)) {
+ dev_err(&pdev->dev,
+ "Cannot modify SR-IOV while VFs are assigned\n");
+ num_vfs = current_vfs;
+ return num_vfs;
+ }
+
+ /* Disable SRIOV when zero is passed.
+ * One needs to disable SRIOV before modifying it, else
+ * stack throws the below warning:
+ * " 'n' VFs already enabled. Disable before enabling 'm' VFs."
+ */
+ if (!num_vfs) {
+ pci_disable_sriov(pdev);
+ return num_vfs;
+ }
+
+ if (num_vfs != current_vfs) {
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err)
+ return err;
+ }
+ return num_vfs;
+}
+#endif
+
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int func, i, err, s_qpp, qpp, num_seg;
@@ -5162,11 +5220,16 @@
sriov:
#ifdef CONFIG_PCI_IOV
- if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
+ if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) {
+ dev_warn(&pdev->dev,
+ "Enabling SR-IOV VFs using the num_vf module "
+ "parameter is deprecated - please use the pci sysfs "
+ "interface instead.\n");
if (pci_enable_sriov(pdev, num_vf[func]) == 0)
dev_info(&pdev->dev,
"instantiated %u virtual functions\n",
num_vf[func]);
+ }
#endif
return 0;
@@ -5259,6 +5322,9 @@
.probe = init_one,
.remove = remove_one,
.shutdown = remove_one,
+#ifdef CONFIG_PCI_IOV
+ .sriov_configure = cxgb4_iov_configure,
+#endif
.err_handler = &cxgb4_eeh,
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 04fc6f6..8d9b2cb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -937,12 +937,8 @@
{
struct port_info *pi = netdev_priv(dev);
- if (!(dev->flags & IFF_PROMISC)) {
- __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
- if (!(dev->flags & IFF_ALLMULTI))
- __dev_mc_sync(dev, cxgb4vf_mac_sync,
- cxgb4vf_mac_unsync);
- }
+ __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
+ __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
(dev->flags & IFF_PROMISC) != 0,
(dev->flags & IFF_ALLMULTI) != 0,
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 6038304..c363b58 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -53,6 +53,8 @@
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -1895,9 +1897,17 @@
return 0;
}
+static const struct __maybe_unused of_device_id cs89x0_match[] = {
+ { .compatible = "cirrus,cs8900", },
+ { .compatible = "cirrus,cs8920", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, cs89x0_match);
+
static struct platform_driver cs89x0_driver = {
.driver = {
- .name = DRV_NAME,
+ .name = DRV_NAME,
+ .of_match_table = of_match_ptr(cs89x0_match),
},
.remove = cs89x0_platform_remove,
};
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index dbd4ecc..963838d 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -115,6 +115,7 @@
goto err_parent_bus;
}
} else {
+ parent_bus_node = NULL;
parent_bus = mux_bus;
}
@@ -184,8 +185,7 @@
put_device(&pb->mii_bus->dev);
err_parent_bus:
- if (!mux_bus)
- of_node_put(parent_bus_node);
+ of_node_put(parent_bus_node);
return ret_val;
}
EXPORT_SYMBOL_GPL(mdio_mux_init);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8cc6bf4..4884802 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1254,6 +1254,13 @@
return -EFAULT;
}
+ err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
+ if (err) {
+ this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
if (tun->flags & IFF_NO_PI) {
@@ -1280,13 +1287,6 @@
break;
}
- err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
- if (err) {
- this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
- kfree_skb(skb);
- return -EINVAL;
- }
-
/* copy skb_ubuf_info for callback when skb has no error */
if (zerocopy) {
skb_shinfo(skb)->destructor_arg = msg_control;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index b4d7469..32173aa 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -378,23 +378,37 @@
}
/* holding rtnl */
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
+ struct net *net = dev_net(dev);
+ struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rt6, NULL);
RCU_INIT_POINTER(vrf->rt6_local, NULL);
synchronize_rcu();
- if (rt6)
- dst_release(&rt6->dst);
+ /* move dev in dst's to loopback so this VRF device can be deleted
+ * - based on dst_ifdown
+ */
+ if (rt6) {
+ dst = &rt6->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
if (rt6_local) {
if (rt6_local->rt6i_idev)
in6_dev_put(rt6_local->rt6i_idev);
- dst_release(&rt6_local->dst);
+ dst = &rt6_local->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
}
}
@@ -449,7 +463,7 @@
return rc;
}
#else
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
}
@@ -518,20 +532,35 @@
}
/* holding rtnl */
-static void vrf_rtable_release(struct net_vrf *vrf)
+static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rtable *rth = rtnl_dereference(vrf->rth);
struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
+ struct net *net = dev_net(dev);
+ struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rth, NULL);
RCU_INIT_POINTER(vrf->rth_local, NULL);
synchronize_rcu();
- if (rth)
- dst_release(&rth->dst);
+ /* move dev in dst's to loopback so this VRF device can be deleted
+ * - based on dst_ifdown
+ */
+ if (rth) {
+ dst = &rth->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
- if (rth_local)
- dst_release(&rth_local->dst);
+ if (rth_local) {
+ dst = &rth_local->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
}
static int vrf_rtable_create(struct net_device *dev)
@@ -633,8 +662,8 @@
struct net_device *port_dev;
struct list_head *iter;
- vrf_rtable_release(vrf);
- vrf_rt6_release(vrf);
+ vrf_rtable_release(dev, vrf);
+ vrf_rt6_release(dev, vrf);
netdev_for_each_lower_dev(dev, port_dev, iter)
vrf_del_slave(dev, port_dev);
@@ -669,7 +698,7 @@
return 0;
out_rth:
- vrf_rtable_release(vrf);
+ vrf_rtable_release(dev, vrf);
out_stats:
free_percpu(dev->dstats);
dev->dstats = NULL;
@@ -785,9 +814,63 @@
return rc;
}
+static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
+ const struct net_device *dev,
+ struct flowi6 *fl6,
+ int ifindex,
+ int flags)
+{
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct fib6_table *table = NULL;
+ struct rt6_info *rt6;
+
+ rcu_read_lock();
+
+ /* fib6_table does not have a refcnt and can not be freed */
+ rt6 = rcu_dereference(vrf->rt6);
+ if (likely(rt6))
+ table = rt6->rt6i_table;
+
+ rcu_read_unlock();
+
+ if (!table)
+ return NULL;
+
+ return ip6_pol_route(net, table, ifindex, fl6, flags);
+}
+
+static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
+ int ifindex)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct flowi6 fl6 = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = iph->nexthdr,
+ .flowi6_iif = ifindex,
+ };
+ struct net *net = dev_net(vrf_dev);
+ struct rt6_info *rt6;
+
+ rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+ RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
+ if (unlikely(!rt6))
+ return;
+
+ if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst))
+ return;
+
+ skb_dst_set(skb, &rt6->dst);
+}
+
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
struct sk_buff *skb)
{
+ int orig_iif = skb->skb_iif;
+ bool need_strict;
+
/* loopback traffic; do not push through packet taps again.
* Reset pkt_type for upper layers to process skb
*/
@@ -798,8 +881,11 @@
goto out;
}
- /* if packet is NDISC keep the ingress interface */
- if (!ipv6_ndisc_frame(skb)) {
+ /* if packet is NDISC or addressed to multicast or link-local
+ * then keep the ingress interface
+ */
+ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+ if (!ipv6_ndisc_frame(skb) && !need_strict) {
skb->dev = vrf_dev;
skb->skb_iif = vrf_dev->ifindex;
@@ -810,6 +896,9 @@
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
}
+ if (need_strict)
+ vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
+
out:
return skb;
}
@@ -861,13 +950,37 @@
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
- const struct flowi6 *fl6)
+ struct flowi6 *fl6)
{
+ bool need_strict = rt6_need_strict(&fl6->daddr);
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct net *net = dev_net(dev);
struct dst_entry *dst = NULL;
+ struct rt6_info *rt;
- if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
- struct net_vrf *vrf = netdev_priv(dev);
- struct rt6_info *rt;
+ /* send to link-local or multicast address */
+ if (need_strict) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ /* VRF device does not have a link-local address and
+ * sending packets to link-local or mcast addresses over
+ * a VRF device does not make sense
+ */
+ if (fl6->flowi6_oif == dev->ifindex) {
+ struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
+
+ dst_hold(dst);
+ return dst;
+ }
+
+ if (!ipv6_addr_any(&fl6->saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+ if (rt)
+ dst = &rt->dst;
+
+ } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
rcu_read_lock();
@@ -880,6 +993,10 @@
rcu_read_unlock();
}
+ /* make sure oif is set to VRF device for lookup */
+ if (!need_strict)
+ fl6->flowi6_oif = dev->ifindex;
+
return dst;
}
#endif
@@ -1011,6 +1128,20 @@
/* don't allow vrf devices to change network namespaces. */
dev->features |= NETIF_F_NETNS_LOCAL;
+
+ /* does not make sense for a VLAN to be added to a vrf device */
+ dev->features |= NETIF_F_VLAN_CHALLENGED;
+
+ /* enable offload features */
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM;
+ dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
+
+ dev->hw_features = dev->features;
+ dev->hw_enc_features = dev->features;
+
+ /* default to no qdisc; user can add if desired */
+ dev->priv_flags |= IFF_NO_QUEUE;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e955a28..152421c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -123,6 +123,10 @@
MLX5_CMD_OP_DRAIN_DCT = 0x712,
MLX5_CMD_OP_QUERY_DCT = 0x713,
MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714,
+ MLX5_CMD_OP_CREATE_XRQ = 0x717,
+ MLX5_CMD_OP_DESTROY_XRQ = 0x718,
+ MLX5_CMD_OP_QUERY_XRQ = 0x719,
+ MLX5_CMD_OP_ARM_XRQ = 0x71a,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
@@ -139,6 +143,8 @@
MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
+ MLX5_CMD_OP_SET_RATE_LIMIT = 0x780,
+ MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
MLX5_CMD_OP_ALLOC_PD = 0x800,
MLX5_CMD_OP_DEALLOC_PD = 0x801,
MLX5_CMD_OP_ALLOC_UAR = 0x802,
@@ -362,7 +368,8 @@
};
struct mlx5_ifc_fte_match_set_misc_bits {
- u8 reserved_at_0[0x20];
+ u8 reserved_at_0[0x8];
+ u8 source_sqn[0x18];
u8 reserved_at_20[0x10];
u8 source_port[0x10];
@@ -508,6 +515,17 @@
u8 reserved_at_20[0x7e0];
};
+struct mlx5_ifc_qos_cap_bits {
+ u8 packet_pacing[0x1];
+ u8 reserved_0[0x1f];
+ u8 reserved_1[0x20];
+ u8 packet_pacing_max_rate[0x20];
+ u8 packet_pacing_min_rate[0x20];
+ u8 reserved_2[0x10];
+ u8 packet_pacing_rate_table_size[0x10];
+ u8 reserved_3[0x760];
+};
+
struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 csum_cap[0x1];
u8 vlan_cap[0x1];
@@ -747,7 +765,8 @@
u8 out_of_seq_cnt[0x1];
u8 vport_counters[0x1];
- u8 reserved_at_182[0x4];
+ u8 retransmission_q_counters[0x1];
+ u8 reserved_at_183[0x3];
u8 max_qp_cnt[0xa];
u8 pkey_table_size[0x10];
@@ -774,7 +793,9 @@
u8 log_max_msg[0x5];
u8 reserved_at_1c8[0x4];
u8 max_tc[0x4];
- u8 reserved_at_1d0[0x6];
+ u8 reserved_at_1d0[0x1];
+ u8 dcbx[0x1];
+ u8 reserved_at_1d2[0x4];
u8 rol_s[0x1];
u8 rol_g[0x1];
u8 reserved_at_1d8[0x1];
@@ -806,7 +827,7 @@
u8 tph[0x1];
u8 rf[0x1];
u8 dct[0x1];
- u8 reserved_at_21b[0x1];
+ u8 qos[0x1];
u8 eth_net_offloads[0x1];
u8 roce[0x1];
u8 atomic[0x1];
@@ -932,7 +953,15 @@
u8 cqe_compression_timeout[0x10];
u8 cqe_compression_max_num[0x10];
- u8 reserved_at_5e0[0x220];
+ u8 reserved_at_5e0[0x10];
+ u8 tag_matching[0x1];
+ u8 rndv_offload_rc[0x1];
+ u8 rndv_offload_dc[0x1];
+ u8 log_tag_matching_list_sz[0x5];
+ u8 reserved_at_5e8[0x3];
+ u8 log_max_xrq[0x5];
+
+ u8 reserved_at_5f0[0x200];
};
enum mlx5_flow_destination_type {
@@ -1970,7 +1999,7 @@
u8 reserved_at_560[0x5];
u8 rq_type[0x3];
- u8 srqn_rmpn[0x18];
+ u8 srqn_rmpn_xrqn[0x18];
u8 reserved_at_580[0x8];
u8 rmsn[0x18];
@@ -2021,6 +2050,7 @@
struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
+ struct mlx5_ifc_qos_cap_bits qos_cap;
u8 reserved_at_0[0x8000];
};
@@ -2247,8 +2277,9 @@
u8 reserved_at_40[0x8];
u8 cqn[0x18];
- u8 reserved_at_60[0xa0];
+ u8 reserved_at_60[0x90];
+ u8 packet_pacing_rate_limit_index[0x10];
u8 tis_lst_sz[0x10];
u8 reserved_at_110[0x10];
@@ -2596,7 +2627,7 @@
u8 reserved_at_98[0x8];
u8 reserved_at_a0[0x8];
- u8 srqn[0x18];
+ u8 srqn_xrqn[0x18];
u8 reserved_at_c0[0x8];
u8 pd[0x18];
@@ -2648,6 +2679,7 @@
enum {
MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+ MLX5_CQ_PERIOD_NUM_MODES
};
struct mlx5_ifc_cqc_bits {
@@ -2725,6 +2757,54 @@
u8 vsd_contd_psid[16][0x8];
};
+enum {
+ MLX5_XRQC_STATE_GOOD = 0x0,
+ MLX5_XRQC_STATE_ERROR = 0x1,
+};
+
+enum {
+ MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0,
+ MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1,
+};
+
+enum {
+ MLX5_XRQC_OFFLOAD_RNDV = 0x1,
+};
+
+struct mlx5_ifc_tag_matching_topology_context_bits {
+ u8 log_matching_list_sz[0x4];
+ u8 reserved_at_4[0xc];
+ u8 append_next_index[0x10];
+
+ u8 sw_phase_cnt[0x10];
+ u8 hw_phase_cnt[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_xrqc_bits {
+ u8 state[0x4];
+ u8 rlkey[0x1];
+ u8 reserved_at_5[0xf];
+ u8 topology[0x4];
+ u8 reserved_at_18[0x4];
+ u8 offload[0x4];
+
+ u8 reserved_at_20[0x8];
+ u8 user_index[0x18];
+
+ u8 reserved_at_40[0x8];
+ u8 cqn[0x18];
+
+ u8 reserved_at_60[0xa0];
+
+ struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
+
+ u8 reserved_at_180[0x180];
+
+ struct mlx5_ifc_wq_bits wq;
+};
+
union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
struct mlx5_ifc_modify_field_select_bits modify_field_select;
struct mlx5_ifc_resize_field_select_bits resize_field_select;
@@ -3147,6 +3227,30 @@
u8 reserved_at_800[0x80];
};
+struct mlx5_ifc_query_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
+struct mlx5_ifc_query_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_query_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -3550,7 +3654,27 @@
u8 out_of_sequence[0x20];
- u8 reserved_at_1e0[0x620];
+ u8 reserved_at_1e0[0x20];
+
+ u8 duplicate_request[0x20];
+
+ u8 reserved_at_220[0x20];
+
+ u8 rnr_nak_retry_err[0x20];
+
+ u8 reserved_at_260[0x20];
+
+ u8 packet_seq_err[0x20];
+
+ u8 reserved_at_2a0[0x20];
+
+ u8 implied_nak_seq_err[0x20];
+
+ u8 reserved_at_2e0[0x20];
+
+ u8 local_ack_timeout_err[0x20];
+
+ u8 reserved_at_320[0x4e0];
};
struct mlx5_ifc_query_q_counter_in_bits {
@@ -5004,6 +5128,28 @@
u8 multicast_gid[16][0x8];
};
+struct mlx5_ifc_destroy_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_destroy_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -5589,6 +5735,30 @@
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_create_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
struct mlx5_ifc_create_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6130,6 +6300,29 @@
u8 multicast_gid[16][0x8];
};
+struct mlx5_ifc_arm_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_arm_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x10];
+ u8 lwm[0x10];
+};
+
struct mlx5_ifc_arm_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6167,7 +6360,8 @@
};
enum {
- MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1,
+ MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1,
+ MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2,
};
struct mlx5_ifc_arm_rq_in_bits {
@@ -6360,6 +6554,30 @@
u8 vxlan_udp_port[0x10];
};
+struct mlx5_ifc_set_rate_limit_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_rate_limit_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 rate_limit_index[0x10];
+
+ u8 reserved_at_60[0x20];
+
+ u8 rate_limit[0x20];
+};
+
struct mlx5_ifc_access_register_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6484,12 +6702,15 @@
};
struct mlx5_ifc_ptys_reg_bits {
- u8 reserved_at_0[0x8];
+ u8 an_disable_cap[0x1];
+ u8 an_disable_admin[0x1];
+ u8 reserved_at_2[0x6];
u8 local_port[0x8];
u8 reserved_at_10[0xd];
u8 proto_mask[0x3];
- u8 reserved_at_20[0x40];
+ u8 an_status[0x4];
+ u8 reserved_at_24[0x3c];
u8 eth_proto_capability[0x20];
@@ -7450,4 +7671,34 @@
u8 dword_11[0x20];
};
+struct mlx5_ifc_dcbx_param_bits {
+ u8 dcbx_cee_cap[0x1];
+ u8 dcbx_ieee_cap[0x1];
+ u8 dcbx_standby_cap[0x1];
+ u8 reserved_at_0[0x5];
+ u8 port_number[0x8];
+ u8 reserved_at_10[0xa];
+ u8 max_application_table_size[6];
+ u8 reserved_at_20[0x15];
+ u8 version_oper[0x3];
+ u8 reserved_at_38[5];
+ u8 version_admin[0x3];
+ u8 willing_admin[0x1];
+ u8 reserved_at_41[0x3];
+ u8 pfc_cap_oper[0x4];
+ u8 reserved_at_48[0x4];
+ u8 pfc_cap_admin[0x4];
+ u8 reserved_at_50[0x4];
+ u8 num_of_tc_oper[0x4];
+ u8 reserved_at_58[0x4];
+ u8 num_of_tc_admin[0x4];
+ u8 remote_willing[0x1];
+ u8 reserved_at_61[3];
+ u8 remote_pfc_cap[4];
+ u8 reserved_at_68[0x14];
+ u8 remote_num_of_tc[0x4];
+ u8 reserved_at_80[0x18];
+ u8 error[0x8];
+ u8 reserved_at_a0[0x160];
+};
#endif /* MLX5_IFC_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d101e4d..890158e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,6 +1456,8 @@
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
* @ethtool_ops: Management operations
+ * @ndisc_ops: Includes callbacks for different IPv6 neighbour
+ * discovery handling. Necessary for e.g. 6LoWPAN.
* @header_ops: Includes callbacks for creating,parsing,caching,etc
* of Layer 2 headers.
*
@@ -1483,8 +1485,7 @@
* @perm_addr: Permanent hw address
* @addr_assign_type: Hw address assignment type
* @addr_len: Hardware address length
- * @neigh_priv_len; Used in neigh_alloc(),
- * initialized only in atm/clip.c
+ * @neigh_priv_len: Used in neigh_alloc()
* @dev_id: Used to differentiate devices that share
* the same link layer address
* @dev_port: Used to differentiate devices that share
@@ -1673,6 +1674,9 @@
#ifdef CONFIG_NET_L3_MASTER_DEV
const struct l3mdev_ops *l3mdev_ops;
#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ndisc_ops *ndisc_ops;
+#endif
const struct header_ops *header_ops;
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
new file mode 100644
index 0000000..562a65e
--- /dev/null
+++ b/include/linux/ptr_ring.h
@@ -0,0 +1,393 @@
+/*
+ * Definitions for the 'struct ptr_ring' datastructure.
+ *
+ * Author:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This is a limited-size FIFO maintaining pointers in FIFO order, with
+ * one CPU producing entries and another consuming entries from a FIFO.
+ *
+ * This implementation tries to minimize cache-contention when there is a
+ * single producer and a single consumer CPU.
+ */
+
+#ifndef _LINUX_PTR_RING_H
+#define _LINUX_PTR_RING_H 1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/errno.h>
+#endif
+
+struct ptr_ring {
+ int producer ____cacheline_aligned_in_smp;
+ spinlock_t producer_lock;
+ int consumer ____cacheline_aligned_in_smp;
+ spinlock_t consumer_lock;
+ /* Shared consumer/producer data */
+ /* Read-only by both the producer and the consumer */
+ int size ____cacheline_aligned_in_smp; /* max entries in queue */
+ void **queue;
+};
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). If ring is ever resized, callers must hold
+ * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold
+ * producer_lock, the next call to __ptr_ring_produce may fail.
+ */
+static inline bool __ptr_ring_full(struct ptr_ring *r)
+{
+ return r->queue[r->producer];
+}
+
+static inline bool ptr_ring_full(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock(&r->producer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_irq(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock_irq(&r->producer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&r->producer_lock, flags);
+ ret = __ptr_ring_full(r);
+ spin_unlock_irqrestore(&r->producer_lock, flags);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_bh(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock_bh(&r->producer_lock);
+
+ return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must hold producer_lock.
+ */
+static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+ if (r->queue[r->producer])
+ return -ENOSPC;
+
+ r->queue[r->producer++] = ptr;
+ if (unlikely(r->producer >= r->size))
+ r->producer = 0;
+ return 0;
+}
+
+static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock(&r->producer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock_irq(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_irq(&r->producer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&r->producer_lock, flags);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_irqrestore(&r->producer_lock, flags);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock_bh(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_bh(&r->producer_lock);
+
+ return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
+ * If ring is never resized, and if the pointer is merely
+ * tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
+ */
+static inline void *__ptr_ring_peek(struct ptr_ring *r)
+{
+ return r->queue[r->consumer];
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if the ring is ever resized - see e.g. ptr_ring_empty.
+ */
+static inline bool __ptr_ring_empty(struct ptr_ring *r)
+{
+ return !__ptr_ring_peek(r);
+}
+
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_irq(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_irq(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_bh(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_bh(&r->consumer_lock);
+
+ return ret;
+}
+
+/* Must only be called after __ptr_ring_peek returned !NULL */
+static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+{
+ r->queue[r->consumer++] = NULL;
+ if (unlikely(r->consumer >= r->size))
+ r->consumer = 0;
+}
+
+static inline void *__ptr_ring_consume(struct ptr_ring *r)
+{
+ void *ptr;
+
+ ptr = __ptr_ring_peek(r);
+ if (ptr)
+ __ptr_ring_discard_one(r);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock(&r->consumer_lock);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock_irq(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_irq(&r->consumer_lock);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ void *ptr;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock_bh(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_bh(&r->consumer_lock);
+
+ return ptr;
+}
+
+/* Cast to structure type and call a function without discarding from FIFO.
+ * Function must return a value.
+ * Callers must take consumer_lock.
+ */
+#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+
+#define PTR_RING_PEEK_CALL(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock_irq(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_irq(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock_bh(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_bh(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ unsigned long __PTR_RING_PEEK_CALL_f;\
+ \
+ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
+{
+ return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
+}
+
+static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+{
+ r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+ if (!r->queue)
+ return -ENOMEM;
+
+ r->size = size;
+ r->producer = r->consumer = 0;
+ spin_lock_init(&r->producer_lock);
+ spin_lock_init(&r->consumer_lock);
+
+ return 0;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+ void (*destroy)(void *))
+{
+ unsigned long flags;
+ int producer = 0;
+ void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+ void **old;
+ void *ptr;
+
+ if (!queue)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&(r)->producer_lock, flags);
+
+ while ((ptr = ptr_ring_consume(r)))
+ if (producer < size)
+ queue[producer++] = ptr;
+ else if (destroy)
+ destroy(ptr);
+
+ r->size = size;
+ r->producer = producer;
+ r->consumer = 0;
+ old = r->queue;
+ r->queue = queue;
+
+ spin_unlock_irqrestore(&(r)->producer_lock, flags);
+
+ kfree(old);
+
+ return 0;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
+{
+ void *ptr;
+
+ if (destroy)
+ while ((ptr = ptr_ring_consume(r)))
+ destroy(ptr);
+ kfree(r->queue);
+}
+
+#endif /* _LINUX_PTR_RING_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c006cc9..2daece8 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,8 +89,9 @@
void net_dec_egress_queue(void);
#endif
-extern void rtnetlink_init(void);
-extern void __rtnl_unlock(void);
+void rtnetlink_init(void);
+void __rtnl_unlock(void);
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
#define ASSERT_RTNL() do { \
if (unlikely(!rtnl_is_locked())) { \
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
new file mode 100644
index 0000000..678bfbf
--- /dev/null
+++ b/include/linux/skb_array.h
@@ -0,0 +1,169 @@
+/*
+ * Definitions for the 'struct skb_array' datastructure.
+ *
+ * Author:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * Limited-size FIFO of skbs. Can be used more or less whenever
+ * sk_buff_head can be used, except you need to know the queue size in
+ * advance.
+ * Implemented as a type-safe wrapper around ptr_ring.
+ */
+
+#ifndef _LINUX_SKB_ARRAY_H
+#define _LINUX_SKB_ARRAY_H 1
+
+#ifdef __KERNEL__
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#endif
+
+struct skb_array {
+ struct ptr_ring ring;
+};
+
+/* Might be slightly faster than skb_array_full below, but callers invoking
+ * this in a loop must use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_full(struct skb_array *a)
+{
+ return __ptr_ring_full(&a->ring);
+}
+
+static inline bool skb_array_full(struct skb_array *a)
+{
+ return ptr_ring_full(&a->ring);
+}
+
+static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce(&a->ring, skb);
+}
+
+static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_irq(&a->ring, skb);
+}
+
+static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_bh(&a->ring, skb);
+}
+
+static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_any(&a->ring, skb);
+}
+
+/* Might be slightly faster than skb_array_empty below, but only safe if the
+ * array is never resized. Also, callers invoking this in a loop must take care
+ * to use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_empty(struct skb_array *a)
+{
+ return !__ptr_ring_peek(&a->ring);
+}
+
+static inline bool skb_array_empty(struct skb_array *a)
+{
+ return ptr_ring_empty(&a->ring);
+}
+
+static inline bool skb_array_empty_bh(struct skb_array *a)
+{
+ return ptr_ring_empty_bh(&a->ring);
+}
+
+static inline bool skb_array_empty_irq(struct skb_array *a)
+{
+ return ptr_ring_empty_irq(&a->ring);
+}
+
+static inline bool skb_array_empty_any(struct skb_array *a)
+{
+ return ptr_ring_empty_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume(struct skb_array *a)
+{
+ return ptr_ring_consume(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
+{
+ return ptr_ring_consume_irq(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
+{
+ return ptr_ring_consume_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
+{
+ return ptr_ring_consume_bh(&a->ring);
+}
+
+static inline int __skb_array_len_with_tag(struct sk_buff *skb)
+{
+ if (likely(skb)) {
+ int len = skb->len;
+
+ if (skb_vlan_tag_present(skb))
+ len += VLAN_HLEN;
+
+ return len;
+ } else {
+ return 0;
+ }
+}
+
+static inline int skb_array_peek_len(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_irq(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_bh(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_any(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_init(&a->ring, size, gfp);
+}
+
+void __skb_array_destroy_skb(void *ptr)
+{
+ kfree_skb(ptr);
+}
+
+int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
+}
+
+static inline void skb_array_cleanup(struct skb_array *a)
+{
+ ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
+}
+
+#endif /* _LINUX_SKB_ARRAY_H */
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index da84cf9..5ab4c99 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -141,6 +141,16 @@
u8 priv[0] __aligned(sizeof(void *));
};
+struct lowpan_802154_neigh {
+ __le16 short_addr;
+};
+
+static inline
+struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv)
+{
+ return neigh_priv;
+}
+
static inline
struct lowpan_dev *lowpan_dev(const struct net_device *dev)
{
@@ -244,6 +254,12 @@
return false;
}
+static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr)
+{
+ /* First bit of addr is multicast, reserved or 802.15.4 specific */
+ return !(addr & cpu_to_le16(0x8000));
+}
+
static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
const size_t len)
{
diff --git a/include/net/act_api.h b/include/net/act_api.h
index db218a1..fb82b5b 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -155,8 +155,8 @@
struct tc_action *a);
int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index);
u32 tcf_hash_new_index(struct tc_action_net *tn);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
- int bind);
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind);
int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action *a, int size, int bind, bool cpustats);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 730d856..9826d3a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,16 @@
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags);
+
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft);
+
static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != ETH_ALEN)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 54c7794..f55bf3d 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -76,6 +76,8 @@
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
int flags);
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int ifindex, struct flowi6 *fl6, int flags);
int ip6_route_init(void);
void ip6_route_cleanup(void);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index dbf4444..9222678 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -132,6 +132,7 @@
int ip_tnl_net_id;
struct gro_cells gro_cells;
bool collect_md;
+ bool ignore_df;
};
#define TUNNEL_CSUM __cpu_to_be16(0x01)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 34f33eb..f8a416e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -38,7 +38,7 @@
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
- const struct flowi6 *fl6);
+ struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -139,7 +139,7 @@
int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -225,7 +225,7 @@
}
static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
{
return NULL;
}
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 2d8edaa..be1fe228 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,6 +35,7 @@
ND_OPT_ROUTE_INFO = 24, /* RFC4191 */
ND_OPT_RDNSS = 25, /* RFC5006 */
ND_OPT_DNSSL = 31, /* RFC6106 */
+ ND_OPT_6CO = 34, /* RFC6775 */
__ND_OPT_MAX
};
@@ -53,11 +54,21 @@
#include <net/neighbour.h>
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...) \
+do { \
+ if (val <= ND_DEBUG) \
+ net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
+} while (0)
+
struct ctl_table;
struct inet6_dev;
struct net_device;
struct net_proto_family;
struct sk_buff;
+struct prefix_info;
extern struct neigh_table nd_tbl;
@@ -99,20 +110,201 @@
#endif
struct nd_opt_hdr *nd_useropts;
struct nd_opt_hdr *nd_useropts_end;
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1];
+#endif
};
-#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+#define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR]
#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts);
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad);
+
+#define NDISC_OPS_REDIRECT_DATA_SPACE 2
+
+/*
+ * This structure defines the hooks for IPv6 neighbour discovery.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * int (*is_useropt)(u8 nd_opt_type):
+ * This function is called when IPv6 decide RA userspace options. if
+ * this function returns 1 then the option given by nd_opt_type will
+ * be handled as userspace option additional to the IPv6 options.
+ *
+ * int (*parse_options)(const struct net_device *dev,
+ * struct nd_opt_hdr *nd_opt,
+ * struct ndisc_options *ndopts):
+ * This function is called while parsing ndisc ops and put each position
+ * as pointer into ndopts. If this function return unequal 0, then this
+ * function took care about the ndisc option, if 0 then the IPv6 ndisc
+ * option parser will take care about that option.
+ *
+ * void (*update)(const struct net_device *dev, struct neighbour *n,
+ * u32 flags, u8 icmp6_type,
+ * const struct ndisc_options *ndopts):
+ * This function is called when IPv6 ndisc updates the neighbour cache
+ * entry. Additional options which can be updated may be previously
+ * parsed by parse_opts callback and accessible over ndopts parameter.
+ *
+ * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ * struct neighbour *neigh, u8 *ha_buf,
+ * u8 **ha):
+ * This function is called when the necessary option space will be
+ * calculated before allocating a skb. The parameters neigh, ha_buf
+ * abd ha are available on NDISC_REDIRECT messages only.
+ *
+ * void (*fill_addr_option)(const struct net_device *dev,
+ * struct sk_buff *skb, u8 icmp6_type,
+ * const u8 *ha):
+ * This function is called when the skb will finally fill the option
+ * fields inside skb. NOTE: this callback should fill the option
+ * fields to the skb which are previously indicated by opt_space
+ * parameter. That means the decision to add such option should
+ * not lost between these two callbacks, e.g. protected by interface
+ * up state.
+ *
+ * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ * const struct prefix_info *pinfo,
+ * struct inet6_dev *in6_dev,
+ * struct in6_addr *addr,
+ * int addr_type, u32 addr_flags,
+ * bool sllao, bool tokenized,
+ * __u32 valid_lft, u32 prefered_lft,
+ * bool dev_addr_generated):
+ * This function is called when a RA messages is received with valid
+ * PIO option fields and an IPv6 address will be added to the interface
+ * for autoconfiguration. The parameter dev_addr_generated reports about
+ * if the address was based on dev->dev_addr or not. This can be used
+ * to add a second address if link-layer operates with two link layer
+ * addresses. E.g. 802.15.4 6LoWPAN.
+ */
+struct ndisc_ops {
+ int (*is_useropt)(u8 nd_opt_type);
+ int (*parse_options)(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts);
+ void (*update)(const struct net_device *dev, struct neighbour *n,
+ u32 flags, u8 icmp6_type,
+ const struct ndisc_options *ndopts);
+ int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ struct neighbour *neigh, u8 *ha_buf,
+ u8 **ha);
+ void (*fill_addr_option)(const struct net_device *dev,
+ struct sk_buff *skb, u8 icmp6_type,
+ const u8 *ha);
+ void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft,
+ bool dev_addr_generated);
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_ops_is_useropt(const struct net_device *dev,
+ u8 nd_opt_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
+ return dev->ndisc_ops->is_useropt(nd_opt_type);
+ else
+ return 0;
+}
+
+static inline int ndisc_ops_parse_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->parse_options)
+ return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts);
+ else
+ return 0;
+}
+
+static inline void ndisc_ops_update(const struct net_device *dev,
+ struct neighbour *n, u32 flags,
+ u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->update)
+ dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts);
+}
+
+static inline int ndisc_ops_opt_addr_space(const struct net_device *dev,
+ u8 icmp6_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space &&
+ icmp6_type != NDISC_REDIRECT)
+ return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL,
+ NULL, NULL);
+ else
+ return 0;
+}
+
+static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev,
+ struct neighbour *neigh,
+ u8 *ha_buf, u8 **ha)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space)
+ return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT,
+ neigh, ha_buf, ha);
+ else
+ return 0;
+}
+
+static inline void ndisc_ops_fill_addr_option(const struct net_device *dev,
+ struct sk_buff *skb,
+ u8 icmp6_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option &&
+ icmp6_type != NDISC_REDIRECT)
+ dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL);
+}
+
+static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev,
+ struct sk_buff *skb,
+ const u8 *ha)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option)
+ dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha);
+}
+
+static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net,
+ struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft,
+ u32 prefered_lft,
+ bool dev_addr_generated)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr)
+ dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
+}
+#endif
+
/*
* Return the padding between the option length and the start of the
* link addr. Currently only IP-over-InfiniBand needs this, although
@@ -127,21 +319,46 @@
}
}
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad)
{
- return NDISC_OPT_SPACE(dev->addr_len +
- ndisc_addr_option_pad(dev->type));
+ return NDISC_OPT_SPACE(addr_len + pad);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type)
+{
+ return __ndisc_opt_addr_space(dev->addr_len,
+ ndisc_addr_option_pad(dev->type)) +
+ ndisc_ops_opt_addr_space(dev, icmp6_type);
+}
+
+static inline int ndisc_redirect_opt_addr_space(struct net_device *dev,
+ struct neighbour *neigh,
+ u8 *ops_data_buf,
+ u8 **ops_data)
+{
+ return __ndisc_opt_addr_space(dev->addr_len,
+ ndisc_addr_option_pad(dev->type)) +
+ ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf,
+ ops_data);
+}
+#endif
+
+static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p,
+ unsigned char addr_len, int prepad)
+{
+ u8 *lladdr = (u8 *)(p + 1);
+ int lladdrlen = p->nd_opt_len << 3;
+ if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad))
+ return NULL;
+ return lladdr + prepad;
}
static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
struct net_device *dev)
{
- u8 *lladdr = (u8 *)(p + 1);
- int lladdrlen = p->nd_opt_len << 3;
- int prepad = ndisc_addr_option_pad(dev->type);
- if (lladdrlen != ndisc_opt_addr_space(dev))
- return NULL;
- return lladdr + prepad;
+ return __ndisc_opt_addr_data(p, dev->addr_len,
+ ndisc_addr_option_pad(dev->type));
}
static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd)
@@ -194,6 +411,9 @@
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
int dir);
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts);
/*
* IGMP
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9a0d177..4f7cee8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,19 +683,21 @@
return skb;
}
-static inline void __qdisc_reset_queue(struct Qdisc *sch,
- struct sk_buff_head *list)
+static inline void __qdisc_reset_queue(struct sk_buff_head *list)
{
/*
* We do not know the backlog in bytes of this list, it
* is up to the caller to correct it
*/
- __skb_queue_purge(list);
+ if (!skb_queue_empty(list)) {
+ rtnl_kfree_skbs(list->next, list->prev);
+ __skb_queue_head_init(list);
+ }
}
static inline void qdisc_reset_queue(struct Qdisc *sch)
{
- __qdisc_reset_queue(sch, &sch->q);
+ __qdisc_reset_queue(&sch->q);
sch->qstats.backlog = 0;
}
@@ -716,6 +718,12 @@
return old;
}
+static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
+{
+ rtnl_kfree_skbs(skb, skb);
+ qdisc_qstats_drop(sch);
+}
+
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
{
kfree_skb(skb);
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index af4de90..1046f55 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -113,6 +113,7 @@
IFLA_GRE_ENCAP_SPORT,
IFLA_GRE_ENCAP_DPORT,
IFLA_GRE_COLLECT_METADATA,
+ IFLA_GRE_IGNORE_DF,
__IFLA_GRE_MAX,
};
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index 97ecc27..a67caee 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -12,6 +12,10 @@
return lowpan_dev(dev)->lltype == lltype;
}
+extern const struct ndisc_ops lowpan_ndisc_ops;
+
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev);
+
#ifdef CONFIG_6LOWPAN_DEBUGFS
int lowpan_dev_debugfs_init(struct net_device *dev);
void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index e44f3bf..12d131a 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
-6lowpan-y := core.o iphc.o nhc.o
+6lowpan-y := core.o iphc.o nhc.o ndisc.o
6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o
#rfc6282 nhcs
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7a240b3..5945f7e 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <net/6lowpan.h>
+#include <net/addrconf.h>
#include "6lowpan_i.h"
@@ -33,6 +34,8 @@
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
lowpan_dev(dev)->ctx.table[i].id = i;
+ dev->ndisc_ops = &lowpan_ndisc_ops;
+
ret = register_netdevice(dev);
if (ret < 0)
return ret;
@@ -72,16 +75,61 @@
}
EXPORT_SYMBOL(lowpan_unregister_netdev);
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev)
+{
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ /* Set short_addr autoconfiguration if short_addr is present only */
+ if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ return -1;
+
+ /* For either address format, all zero addresses MUST NOT be used */
+ if (wpan_dev->pan_id == cpu_to_le16(0x0000) &&
+ wpan_dev->short_addr == cpu_to_le16(0x0000))
+ return -1;
+
+ /* Alternatively, if no PAN ID is known, 16 zero bits may be used */
+ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ memset(eui, 0, 2);
+ else
+ ieee802154_le16_to_be16(eui, &wpan_dev->pan_id);
+
+ /* The "Universal/Local" (U/L) bit shall be set to zero */
+ eui[0] &= ~2;
+ eui[2] = 0;
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[5] = 0;
+ ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr);
+ return 0;
+}
+
static int lowpan_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev;
+ struct in6_addr addr;
int i;
if (dev->type != ARPHRD_6LOWPAN)
return NOTIFY_DONE;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ return NOTIFY_DONE;
+
switch (event) {
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ /* (802.15.4 6LoWPAN short address slaac handling */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) &&
+ addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) {
+ __ipv6_addr_set_half(&addr.s6_addr32[0],
+ htonl(0xFE800000), 0);
+ addrconf_add_linklocal(idev, &addr, 0);
+ }
+ break;
case NETDEV_DOWN:
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
@@ -112,8 +160,6 @@
return ret;
}
- request_module_nowait("ipv6");
-
request_module_nowait("nhc_dest");
request_module_nowait("nhc_fragment");
request_module_nowait("nhc_hop");
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index acbaa3d..24915e0 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -245,6 +245,41 @@
.release = single_release,
};
+static int lowpan_short_addr_get(void *data, u64 *val)
+{
+ struct wpan_dev *wdev = data;
+
+ rtnl_lock();
+ *val = le16_to_cpu(wdev->short_addr);
+ rtnl_unlock();
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get,
+ NULL, "0x%04llx\n");
+
+static int lowpan_dev_debugfs_802154_init(const struct net_device *dev,
+ struct lowpan_dev *ldev)
+{
+ struct dentry *dentry, *root;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ root = debugfs_create_dir("ieee802154", ldev->iface_debugfs);
+ if (!root)
+ return -EINVAL;
+
+ dentry = debugfs_create_file("short_addr", 0444, root,
+ lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
+ &lowpan_short_addr_fops);
+ if (!dentry)
+ return -EINVAL;
+
+ return 0;
+}
+
int lowpan_dev_debugfs_init(struct net_device *dev)
{
struct lowpan_dev *ldev = lowpan_dev(dev);
@@ -272,6 +307,10 @@
goto remove_root;
}
+ ret = lowpan_dev_debugfs_802154_init(dev, ldev);
+ if (ret < 0)
+ goto remove_root;
+
return 0;
remove_root:
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 8501dd5..79f1fa2 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -761,22 +761,75 @@
[LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
};
-static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
+ const struct lowpan_iphc_ctx *ctx,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], &extended_addr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const struct lowpan_iphc_ctx *ctx,
const unsigned char *lladdr, bool sam)
{
struct in6_addr tmp = {};
u8 dam;
- /* check for SAM/DAM = 11 */
- memcpy(&tmp.s6_addr[8], lladdr, 8);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- tmp.s6_addr[8] ^= 0x02;
- /* context information are always used */
- ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
- if (ipv6_addr_equal(&tmp, ipaddr)) {
- dam = LOWPAN_IPHC_DAM_11;
- goto out;
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_ctx_802154_lladdr(ipaddr, ctx,
+ lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
+ default:
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
}
memset(&tmp, 0, sizeof(tmp));
@@ -813,28 +866,85 @@
return dam;
}
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_802154_lladdr(const struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ if (is_addr_mac_addr_based(ipaddr, extended_addr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ tmp.s6_addr[0] = 0xFE;
+ tmp.s6_addr[1] = 0x80;
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const unsigned char *lladdr, bool sam)
{
- u8 dam = LOWPAN_IPHC_DAM_00;
+ u8 dam = LOWPAN_IPHC_DAM_01;
- if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
- pr_debug("address compression 0 bits\n");
- } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_802154_lladdr(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ default:
+ if (is_addr_mac_addr_based(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ }
+
+ if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
/* compress IID to 16 bits xxxx::XXXX */
lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
*hc_ptr - 2, 2);
- } else {
- /* do not compress IID => xxxx::IID */
- lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
- dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
- raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
- *hc_ptr - 8, 8);
+ goto out;
}
+ /* do not compress IID => xxxx::IID */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
+ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
+ *hc_ptr - 8, 8);
+
+out:
+
if (sam)
return lowpan_iphc_dam_to_sam_value[dam];
else
@@ -1013,9 +1123,6 @@
iphc0 = LOWPAN_DISPATCH_IPHC;
iphc1 = 0;
- raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
- raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
-
raw_dump_table(__func__, "sending raw skb network uncompressed packet",
skb->data, skb->len);
@@ -1088,14 +1195,15 @@
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (sci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->saddr,
&sci_entry, saddr,
true);
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->saddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->saddr,
saddr, true);
pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
@@ -1123,14 +1231,15 @@
}
} else {
if (dci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->daddr,
&dci_entry, daddr,
false);
iphc1 |= LOWPAN_IPHC_DAC;
} else {
if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->daddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->daddr,
daddr, false);
pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
new file mode 100644
index 0000000..ae1d419
--- /dev/null
+++ b/net/6lowpan/ndisc.c
@@ -0,0 +1,234 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2016 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ */
+
+#include <net/6lowpan.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+
+#include "6lowpan_i.h"
+
+static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
+{
+ return nd_opt_type == ND_OPT_6CO;
+}
+
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+#define NDISC_802154_SHORT_ADDR_LENGTH 1
+static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_len) {
+ case NDISC_802154_SHORT_ADDR_LENGTH:
+ if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type])
+ ND_PRINTK(2, warn,
+ "%s: duplicated short addr ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
+ else
+ ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ return 1;
+ default:
+ /* all others will be handled by ndisc IPv6 option parsing */
+ return 0;
+ }
+}
+
+static int lowpan_ndisc_parse_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ return lowpan_ndisc_parse_802154_options(dev, nd_opt, ndopts);
+ default:
+ return 0;
+ }
+}
+
+static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
+ u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+ u8 *lladdr_short = NULL;
+
+ switch (icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ if (ndopts->nd_802154_opts_src_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ case NDISC_REDIRECT:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ if (ndopts->nd_802154_opts_tgt_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ write_lock_bh(&n->lock);
+ if (lladdr_short)
+ ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
+ else
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ write_unlock_bh(&n->lock);
+}
+
+static void lowpan_ndisc_update(const struct net_device *dev,
+ struct neighbour *n, u32 flags, u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ /* react on overrides only. TODO check if this is really right. */
+ if (flags & NEIGH_UPDATE_F_OVERRIDE)
+ lowpan_ndisc_802154_update(n, flags, icmp6_type, ndopts);
+}
+
+static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
+ u8 icmp6_type, struct neighbour *neigh,
+ u8 *ha_buf, u8 **ha)
+{
+ struct lowpan_802154_neigh *n;
+ struct wpan_dev *wpan_dev;
+ int addr_space = 0;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ n = lowpan_802154_neigh(neighbour_priv(neigh));
+
+ read_lock_bh(&neigh->lock);
+ if (lowpan_802154_is_valid_src_short_addr(n->short_addr)) {
+ memcpy(ha_buf, &n->short_addr,
+ IEEE802154_SHORT_ADDR_LEN);
+ read_unlock_bh(&neigh->lock);
+ addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ *ha = ha_buf;
+ }
+ read_unlock_bh(&neigh->lock);
+ break;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_ROUTER_SOLICITATION:
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ addr_space = __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ break;
+ default:
+ break;
+ }
+
+ return addr_space;
+}
+
+static void lowpan_ndisc_fill_addr_option(const struct net_device *dev,
+ struct sk_buff *skb, u8 icmp6_type,
+ const u8 *ha)
+{
+ struct wpan_dev *wpan_dev;
+ __be16 short_addr;
+ u8 opt_type;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ if (ha) {
+ ieee802154_le16_to_be16(&short_addr, ha);
+ __ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+ &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+ return;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ opt_type = ND_OPT_TARGET_LL_ADDR;
+ break;
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ opt_type = ND_OPT_SOURCE_LL_ADDR;
+ break;
+ default:
+ return;
+ }
+
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ ieee802154_le16_to_be16(&short_addr,
+ &wpan_dev->short_addr);
+ __ndisc_fill_addr_option(skb, opt_type, &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+}
+
+static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
+ struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft,
+ u32 prefered_lft,
+ bool dev_addr_generated)
+{
+ int err;
+
+ /* generates short based address for RA PIO's */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && dev_addr_generated &&
+ !addrconf_ifid_802154_6lowpan(addr->s6_addr + 8, dev)) {
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ addr, addr_type, addr_flags,
+ sllao, tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ ND_PRINTK(2, warn,
+ "RA: could not add a short address based address for prefix: %pI6c\n",
+ &pinfo->prefix);
+ }
+}
+#endif
+
+const struct ndisc_ops lowpan_ndisc_ops = {
+ .is_useropt = lowpan_ndisc_is_useropt,
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ .parse_options = lowpan_ndisc_parse_options,
+ .update = lowpan_ndisc_update,
+ .opt_addr_space = lowpan_ndisc_opt_addr_space,
+ .fill_addr_option = lowpan_ndisc_fill_addr_option,
+ .prefix_rcv_add_addr = lowpan_ndisc_prefix_rcv_add_addr,
+#endif
+};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d69c464..eb49ca2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -71,9 +71,31 @@
}
EXPORT_SYMBOL(rtnl_lock);
+static struct sk_buff *defer_kfree_skb_list;
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
+{
+ if (head && tail) {
+ tail->next = defer_kfree_skb_list;
+ defer_kfree_skb_list = head;
+ }
+}
+EXPORT_SYMBOL(rtnl_kfree_skbs);
+
void __rtnl_unlock(void)
{
+ struct sk_buff *head = defer_kfree_skb_list;
+
+ defer_kfree_skb_list = NULL;
+
mutex_unlock(&rtnl_mutex);
+
+ while (head) {
+ struct sk_buff *next = head->next;
+
+ kfree_skb(head);
+ cond_resched();
+ head = next;
+ }
}
void rtnl_unlock(void)
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 4e2b308..8c004a0 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -81,11 +81,21 @@
return 0;
}
+static int lowpan_neigh_construct(struct neighbour *n)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+
+ /* default no short_addr is available for a neighbour */
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
+ .ndo_neigh_construct = lowpan_neigh_construct,
};
static void lowpan_setup(struct net_device *ldev)
@@ -150,6 +160,8 @@
wdev->needed_headroom;
ldev->needed_tailroom = wdev->needed_tailroom;
+ ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh);
+
ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
if (ret < 0) {
dev_put(wdev);
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index e459afd..dbb476d 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -9,6 +9,7 @@
*/
#include <net/6lowpan.h>
+#include <net/ndisc.h>
#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
@@ -17,19 +18,9 @@
#define LOWPAN_FRAG1_HEAD_SIZE 0x4
#define LOWPAN_FRAGN_HEAD_SIZE 0x5
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
- u8 mode;
- union {
- /* IPv6 needs big endian here */
- __be64 extended_addr;
- __be16 short_addr;
- } u;
-};
-
struct lowpan_addr_info {
- struct lowpan_addr daddr;
- struct lowpan_addr saddr;
+ struct ieee802154_addr daddr;
+ struct ieee802154_addr saddr;
};
static inline struct
@@ -48,12 +39,14 @@
* RAW/DGRAM sockets.
*/
int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
- const u8 *saddr = _saddr;
- const u8 *daddr = _daddr;
- struct lowpan_addr_info *info;
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
+ struct lowpan_addr_info *info = lowpan_skb_priv(skb);
+ struct lowpan_802154_neigh *llneigh = NULL;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct neighbour *n;
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
@@ -61,21 +54,50 @@
if (type != ETH_P_IPV6)
return 0;
- if (!saddr)
- saddr = ldev->dev_addr;
+ /* intra-pan communication */
+ info->saddr.pan_id = wpan_dev->pan_id;
+ info->daddr.pan_id = info->saddr.pan_id;
- raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
- raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+ if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
+ info->daddr.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ __le16 short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
- info = lowpan_skb_priv(skb);
+ n = neigh_lookup(&nd_tbl, &hdr->daddr, ldev);
+ if (n) {
+ llneigh = lowpan_802154_neigh(neighbour_priv(n));
+ read_lock_bh(&n->lock);
+ short_addr = llneigh->short_addr;
+ read_unlock_bh(&n->lock);
+ }
- /* TODO: Currently we only support extended_addr */
- info->daddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->daddr.u.extended_addr, daddr,
- sizeof(info->daddr.u.extended_addr));
- info->saddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->saddr.u.extended_addr, saddr,
- sizeof(info->daddr.u.extended_addr));
+ if (llneigh &&
+ lowpan_802154_is_valid_src_short_addr(short_addr)) {
+ info->daddr.short_addr = short_addr;
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ info->daddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->daddr.extended_addr,
+ daddr);
+ }
+
+ if (n)
+ neigh_release(n);
+ }
+
+ if (!saddr) {
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ info->saddr.mode = IEEE802154_ADDR_SHORT;
+ info->saddr.short_addr = wpan_dev->short_addr;
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ info->saddr.extended_addr = wpan_dev->extended_addr;
+ }
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->saddr.extended_addr, saddr);
+ }
return 0;
}
@@ -209,47 +231,26 @@
u16 *dgram_size, u16 *dgram_offset)
{
struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
- struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
- void *daddr, *saddr;
memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
- /* TODO: Currently we only support extended_addr */
- daddr = &info.daddr.u.extended_addr;
- saddr = &info.saddr.u.extended_addr;
-
*dgram_size = skb->len;
- lowpan_header_compress(skb, ldev, daddr, saddr);
+ lowpan_header_compress(skb, ldev, &info.daddr, &info.saddr);
/* dgram_offset = (saved bytes after compression) + lowpan header len */
*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
- /* prepare wpan address data */
- sa.mode = IEEE802154_ADDR_LONG;
- sa.pan_id = wpan_dev->pan_id;
- sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
- /* intra-PAN communications */
- da.pan_id = sa.pan_id;
-
- /* if the destination address is the broadcast address, use the
- * corresponding short address
- */
- if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
- da.mode = IEEE802154_ADDR_SHORT;
- da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ if (info.daddr.mode == IEEE802154_ADDR_SHORT &&
+ ieee802154_is_broadcast_short_addr(info.daddr.short_addr))
cb->ackreq = false;
- } else {
- da.mode = IEEE802154_ADDR_LONG;
- da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+ else
cb->ackreq = wpan_dev->ackreq;
- }
- return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
- &sa, 0);
+ return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev,
+ &info.daddr, &info.saddr, 0);
}
netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4d2025f..0f8ca3f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -841,17 +841,19 @@
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct net_device *dev,
+static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
struct ip_tunnel_parm *parms)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
memset(parms, 0, sizeof(*parms));
parms->iph.protocol = IPPROTO_GRE;
if (!data)
- return;
+ return 0;
if (data[IFLA_GRE_LINK])
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
@@ -880,16 +882,26 @@
if (data[IFLA_GRE_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
- if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+ if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+ if (t->ignore_df)
+ return -EINVAL;
parms->iph.frag_off = htons(IP_DF);
+ }
if (data[IFLA_GRE_COLLECT_METADATA]) {
- struct ip_tunnel *t = netdev_priv(dev);
-
t->collect_md = true;
if (dev->type == ARPHRD_IPGRE)
dev->type = ARPHRD_NONE;
}
+
+ if (data[IFLA_GRE_IGNORE_DF]) {
+ if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+ && (parms->iph.frag_off & htons(IP_DF)))
+ return -EINVAL;
+ t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+ }
+
+ return 0;
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -960,16 +972,19 @@
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -978,16 +993,19 @@
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_changelink(dev, tb, &p);
}
@@ -1024,6 +1042,8 @@
nla_total_size(2) +
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_GRE_IGNORE_DF */
+ nla_total_size(1) +
0;
}
@@ -1057,6 +1077,9 @@
t->encap.flags))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
+ goto nla_put_failure;
+
if (t->collect_md) {
if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
goto nla_put_failure;
@@ -1084,6 +1107,7 @@
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d8f5e0a..95649eb 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -682,7 +682,7 @@
}
df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP))
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 47f837a..6c8fc3f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2254,7 +2254,7 @@
return ERR_PTR(-EACCES);
/* Add default multicast route */
- if (!(dev->flags & IFF_LOOPBACK))
+ if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
addrconf_add_mroute(dev);
return idev;
@@ -2333,12 +2333,109 @@
idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
}
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft)
+{
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+ int create = 0, update_lft = 0;
+
+ if (!ifp && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !net->ipv6.devconf_all->forwarding && sllao)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, addr, NULL,
+ pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags, valid_lft,
+ prefered_lft);
+
+ if (IS_ERR_OR_NULL(ifp))
+ return -1;
+
+ update_lft = 0;
+ create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
+ ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_start(ifp);
+ }
+
+ if (ifp) {
+ u32 flags;
+ unsigned long now;
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock_bh(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && !create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock_bh(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock_bh(&ifp->lock);
+
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
+
+ in6_ifa_put(ifp);
+ addrconf_verify();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
__u32 valid_lft;
__u32 prefered_lft;
- int addr_type;
+ int addr_type, err;
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
@@ -2432,10 +2529,8 @@
/* Try to figure out our local address for this prefix */
if (pinfo->autoconf && in6_dev->cnf.autoconf) {
- struct inet6_ifaddr *ifp;
struct in6_addr addr;
- int create = 0, update_lft = 0;
- bool tokenized = false;
+ bool tokenized = false, dev_addr_generated = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
@@ -2453,106 +2548,36 @@
goto ok;
} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
- in6_dev_put(in6_dev);
- return;
+ goto put;
+ } else {
+ dev_addr_generated = true;
}
goto ok;
}
net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
pinfo->prefix_len);
- in6_dev_put(in6_dev);
- return;
+ goto put;
ok:
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ &addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ goto put;
- ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
-
- if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
-
-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (in6_dev->cnf.optimistic_dad &&
- !net->ipv6.devconf_all->forwarding && sllao)
- addr_flags |= IFA_F_OPTIMISTIC;
-#endif
-
- /* Do not allow to create too much of autoconfigured
- * addresses; this would be too easy way to crash kernel.
- */
- if (!max_addresses ||
- ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, NULL,
- pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags, valid_lft,
- prefered_lft);
-
- if (IS_ERR_OR_NULL(ifp)) {
- in6_dev_put(in6_dev);
- return;
- }
-
- update_lft = 0;
- create = 1;
- spin_lock_bh(&ifp->lock);
- ifp->flags |= IFA_F_MANAGETEMPADDR;
- ifp->cstamp = jiffies;
- ifp->tokenized = tokenized;
- spin_unlock_bh(&ifp->lock);
- addrconf_dad_start(ifp);
- }
-
- if (ifp) {
- u32 flags;
- unsigned long now;
- u32 stored_lft;
-
- /* update lifetime (RFC2462 5.5.3 e) */
- spin_lock_bh(&ifp->lock);
- now = jiffies;
- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
- else
- stored_lft = 0;
- if (!update_lft && !create && stored_lft) {
- const u32 minimum_lft = min_t(u32,
- stored_lft, MIN_VALID_LIFETIME);
- valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
- }
-
- if (update_lft) {
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
- flags = ifp->flags;
- ifp->flags &= ~IFA_F_DEPRECATED;
- spin_unlock_bh(&ifp->lock);
-
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ifp);
- } else
- spin_unlock_bh(&ifp->lock);
-
- manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
- create, now);
-
- in6_ifa_put(ifp);
- addrconf_verify();
- }
+ /* Ignore error case here because previous prefix add addr was
+ * successful which will be notified.
+ */
+ ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
+ addr_type, addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+put:
in6_dev_put(in6_dev);
}
@@ -2947,8 +2972,8 @@
}
}
-static void addrconf_add_linklocal(struct inet6_dev *idev,
- const struct in6_addr *addr, u32 flags)
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags)
{
struct inet6_ifaddr *ifp;
u32 addr_flags = flags | IFA_F_PERMANENT;
@@ -2967,6 +2992,7 @@
in6_ifa_put(ifp);
}
}
+EXPORT_SYMBOL_GPL(addrconf_add_linklocal);
static bool ipv6_reserved_interfaceid(struct in6_addr address)
{
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 40454bf..e32a72f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -587,7 +587,7 @@
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
+ fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index b3d00be..ec9efbc 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -34,12 +34,12 @@
if (p->locator_match.v64) {
diff = p->csum_diff;
} else {
- diff = compute_csum_diff8((__be32 *)iaddr,
- (__be32 *)&p->locator);
+ diff = compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)iaddr);
}
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
- ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+ CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
diff = csum_add(diff, fval);
@@ -140,8 +140,8 @@
return;
p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match,
- (__be32 *)&p->locator);
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
}
static int __init ila_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c245895..fe65cdc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -73,15 +73,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(val, level, fmt, ...) \
-do { \
- if (val <= ND_DEBUG) \
- net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
-} while (0)
-
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
@@ -150,11 +141,10 @@
};
EXPORT_SYMBOL_GPL(nd_tbl);
-static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad)
{
- int pad = ndisc_addr_option_pad(skb->dev->type);
- int data_len = skb->dev->addr_len;
- int space = ndisc_opt_addr_space(skb->dev);
+ int space = __ndisc_opt_addr_space(data_len, pad);
u8 *opt = skb_put(skb, space);
opt[0] = type;
@@ -171,6 +161,23 @@
if (space > 0)
memset(opt, 0, space);
}
+EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
+
+static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
+ void *data, u8 icmp6_type)
+{
+ __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
+ ndisc_addr_option_pad(skb->dev->type));
+ ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
+}
+
+static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
+ void *ha,
+ const u8 *ops_data)
+{
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
+ ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
+}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
@@ -185,24 +192,28 @@
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
-static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+static inline int ndisc_is_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *opt)
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
- opt->nd_opt_type == ND_OPT_DNSSL;
+ opt->nd_opt_type == ND_OPT_DNSSL ||
+ ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
-static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
if (!cur || !end || cur >= end)
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while (cur < end && !ndisc_is_useropt(cur));
- return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+ } while (cur < end && !ndisc_is_useropt(dev, cur));
+ return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -217,6 +228,8 @@
l = nd_opt->nd_opt_len << 3;
if (opt_len < l || l == 0)
return NULL;
+ if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
+ goto next_opt;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
@@ -243,7 +256,7 @@
break;
#endif
default:
- if (ndisc_is_useropt(nd_opt)) {
+ if (ndisc_is_useropt(dev, nd_opt)) {
ndopts->nd_useropts_end = nd_opt;
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
@@ -260,6 +273,7 @@
nd_opt->nd_opt_len);
}
}
+next_opt:
opt_len -= l;
nd_opt = ((void *)nd_opt) + l;
}
@@ -509,7 +523,8 @@
if (!dev->addr_len)
inc_opt = 0;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -528,8 +543,8 @@
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
- dev->dev_addr);
-
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
ndisc_send_skb(skb, daddr, src_addr);
}
@@ -574,7 +589,8 @@
if (ipv6_addr_any(saddr))
inc_opt = false;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -590,7 +606,8 @@
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -626,7 +643,7 @@
}
#endif
if (send_sllao)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -641,7 +658,8 @@
if (send_sllao)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_ROUTER_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -702,6 +720,15 @@
return ret;
}
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts)
+{
+ neigh_update(neigh, lladdr, new, flags);
+ /* report ndisc ops about neighbour update */
+ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
+}
+
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -738,7 +765,7 @@
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
@@ -856,9 +883,10 @@
neigh = __neigh_lookup(&nd_tbl, saddr, dev,
!inc || lladdr || !dev->addr_len);
if (neigh)
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE,
+ NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
@@ -911,7 +939,7 @@
idev->cnf.drop_unsolicited_na)
return;
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
@@ -967,12 +995,13 @@
goto out;
}
- neigh_update(neigh, lladdr,
+ ndisc_update(dev, neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
+ NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
/*
@@ -1017,7 +1046,7 @@
goto out;
/* Parse ND options */
- if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
@@ -1031,10 +1060,11 @@
neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
if (neigh) {
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
+ NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
}
out:
@@ -1135,7 +1165,7 @@
return;
}
- if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
@@ -1329,11 +1359,12 @@
goto out;
}
}
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER);
+ NEIGH_UPDATE_F_ISROUTER,
+ NDISC_ROUTER_ADVERTISEMENT, &ndopts);
}
if (!ipv6_accept_ra(in6_dev)) {
@@ -1421,7 +1452,8 @@
struct nd_opt_hdr *p;
for (p = ndopts.nd_useropts;
p;
- p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ p = ndisc_next_useropt(skb->dev, p,
+ ndopts.nd_useropts_end)) {
ndisc_ra_useropt(skb, p);
}
}
@@ -1459,7 +1491,7 @@
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+ if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
return;
if (!ndopts.nd_opts_rh) {
@@ -1504,7 +1536,8 @@
struct dst_entry *dst;
struct flowi6 fl6;
int rd_len;
- u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
+ ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
int oif = l3mdev_fib_oif(dev);
bool ret;
@@ -1563,7 +1596,9 @@
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_redirect_opt_addr_space(dev, neigh,
+ ops_data_buf,
+ &ops_data);
} else
read_unlock_bh(&neigh->lock);
@@ -1594,7 +1629,7 @@
*/
if (ha)
- ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
+ ndisc_fill_redirect_addr_option(buff, ha, ops_data);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c6ae6f9..9e15167 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1042,8 +1042,8 @@
return pcpu_rt;
}
-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
- struct flowi6 *fl6, int flags)
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt;
@@ -1139,6 +1139,7 @@
}
}
+EXPORT_SYMBOL_GPL(ip6_pol_route);
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi6 *fl6, int flags)
@@ -2200,7 +2201,7 @@
* first-hop router for the specified ICMP Destination Address.
*/
- if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
return;
}
@@ -2235,12 +2236,12 @@
* We have finally decided to accept it.
*/
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
(on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER))
- );
+ NEIGH_UPDATE_F_ISROUTER)),
+ NDISC_REDIRECT, &ndopts);
nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fc3598a..37d674e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1033,6 +1033,7 @@
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
+ size_t headroom, linear;
struct sk_buff *skb;
struct iucv_message txmsg = {0};
struct cmsghdr *cmsg;
@@ -1110,20 +1111,31 @@
* this is fine for SOCK_SEQPACKET (unless we want to support
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb = sock_alloc_send_skb(sk,
- len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
- noblock, &err);
- else
- skb = sock_alloc_send_skb(sk, len, noblock, &err);
+ headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
+ ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
+ if (headroom + len < PAGE_SIZE) {
+ linear = len;
+ } else {
+ /* In nonlinear "classic" iucv skb,
+ * reserve space for iucv_array
+ */
+ if (iucv->transport != AF_IUCV_TRANS_HIPER)
+ headroom += sizeof(struct iucv_array) *
+ (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
+ skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
+ noblock, &err, 0);
if (!skb)
goto out;
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
+ if (err)
goto fail;
- }
/* wait if outstanding messages for iucv path has reached */
timeo = sock_sndtimeo(sk, noblock);
@@ -1148,49 +1160,67 @@
atomic_dec(&iucv->msg_sent);
goto fail;
}
- goto release;
- }
- skb_queue_tail(&iucv->send_skb_q, skb);
+ } else { /* Classic VM IUCV transport */
+ skb_queue_tail(&iucv->send_skb_q, skb);
- if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
- && skb->len <= 7) {
- err = iucv_send_iprm(iucv->path, &txmsg, skb);
+ if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) &&
+ skb->len <= 7) {
+ err = iucv_send_iprm(iucv->path, &txmsg, skb);
- /* on success: there is no message_complete callback
- * for an IPRMDATA msg; remove skb from send queue */
- if (err == 0) {
- skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ /* on success: there is no message_complete callback */
+ /* for an IPRMDATA msg; remove skb from send queue */
+ if (err == 0) {
+ skb_unlink(skb, &iucv->send_skb_q);
+ kfree_skb(skb);
+ }
+
+ /* this error should never happen since the */
+ /* IUCV_IPRMDATA path flag is set... sever path */
+ if (err == 0x15) {
+ pr_iucv->path_sever(iucv->path, NULL);
+ skb_unlink(skb, &iucv->send_skb_q);
+ err = -EPIPE;
+ goto fail;
+ }
+ } else if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ /* skip iucv_array lying in the headroom */
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ IUCV_IPBUFLST, 0,
+ (void *)iba, skb->len);
+ } else { /* non-IPRM Linear skb */
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ 0, 0, (void *)skb->data, skb->len);
}
-
- /* this error should never happen since the
- * IUCV_IPRMDATA path flag is set... sever path */
- if (err == 0x15) {
- pr_iucv->path_sever(iucv->path, NULL);
+ if (err) {
+ if (err == 3) {
+ user_id[8] = 0;
+ memcpy(user_id, iucv->dst_user_id, 8);
+ appl_id[8] = 0;
+ memcpy(appl_id, iucv->dst_name, 8);
+ pr_err(
+ "Application %s on z/VM guest %s exceeds message limit\n",
+ appl_id, user_id);
+ err = -EAGAIN;
+ } else {
+ err = -EPIPE;
+ }
skb_unlink(skb, &iucv->send_skb_q);
- err = -EPIPE;
goto fail;
}
- } else
- err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
- (void *) skb->data, skb->len);
- if (err) {
- if (err == 3) {
- user_id[8] = 0;
- memcpy(user_id, iucv->dst_user_id, 8);
- appl_id[8] = 0;
- memcpy(appl_id, iucv->dst_name, 8);
- pr_err("Application %s on z/VM guest %s"
- " exceeds message limit\n",
- appl_id, user_id);
- err = -EAGAIN;
- } else
- err = -EPIPE;
- skb_unlink(skb, &iucv->send_skb_q);
- goto fail;
}
-release:
release_sock(sk);
return len;
@@ -1201,42 +1231,32 @@
return err;
}
-/* iucv_fragment_skb() - Fragment a single IUCV message into multiple skb's
- *
- * Locking: must be called with message_q.lock held
- */
-static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
+static struct sk_buff *alloc_iucv_recv_skb(unsigned long len)
{
- int dataleft, size, copied = 0;
- struct sk_buff *nskb;
+ size_t headroom, linear;
+ struct sk_buff *skb;
+ int err;
- dataleft = len;
- while (dataleft) {
- if (dataleft >= sk->sk_rcvbuf / 4)
- size = sk->sk_rcvbuf / 4;
- else
- size = dataleft;
-
- nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
- if (!nskb)
- return -ENOMEM;
-
- /* copy target class to control buffer of new skb */
- IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
-
- /* copy data fragment */
- memcpy(nskb->data, skb->data + copied, size);
- copied += size;
- dataleft -= size;
-
- skb_reset_transport_header(nskb);
- skb_reset_network_header(nskb);
- nskb->len = size;
-
- skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb);
+ if (len < PAGE_SIZE) {
+ headroom = 0;
+ linear = len;
+ } else {
+ headroom = sizeof(struct iucv_array) * (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
}
-
- return 0;
+ skb = alloc_skb_with_frags(headroom + linear, len - linear,
+ 0, &err, GFP_ATOMIC | GFP_DMA);
+ WARN_ONCE(!skb,
+ "alloc of recv iucv skb len=%lu failed with errcode=%d\n",
+ len, err);
+ if (skb) {
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
+ }
+ return skb;
}
/* iucv_process_message() - Receive a single outstanding IUCV message
@@ -1263,31 +1283,32 @@
skb->len = 0;
}
} else {
- rc = pr_iucv->message_receive(path, msg,
+ if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ rc = pr_iucv->message_receive(path, msg,
+ IUCV_IPBUFLST,
+ (void *)iba, len, NULL);
+ } else {
+ rc = pr_iucv->message_receive(path, msg,
msg->flags & IUCV_IPRMDATA,
skb->data, len, NULL);
+ }
if (rc) {
kfree_skb(skb);
return;
}
- /* we need to fragment iucv messages for SOCK_STREAM only;
- * for SOCK_SEQPACKET, it is only relevant if we support
- * record segmentation using MSG_EOR (see also recvmsg()) */
- if (sk->sk_type == SOCK_STREAM &&
- skb->truesize >= sk->sk_rcvbuf / 4) {
- rc = iucv_fragment_skb(sk, skb, len);
- kfree_skb(skb);
- skb = NULL;
- if (rc) {
- pr_iucv->path_sever(path, NULL);
- return;
- }
- skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
- } else {
- skb_reset_transport_header(skb);
- skb_reset_network_header(skb);
- skb->len = len;
- }
+ WARN_ON_ONCE(skb->len != len);
}
IUCV_SKB_CB(skb)->offset = 0;
@@ -1306,7 +1327,7 @@
struct sock_msg_q *p, *n;
list_for_each_entry_safe(p, n, &iucv->message_q.list, list) {
- skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(&p->msg));
if (!skb)
break;
iucv_process_message(sk, skb, p->path, &p->msg);
@@ -1801,7 +1822,7 @@
if (len > sk->sk_rcvbuf)
goto save_message;
- skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(msg));
if (!skb)
goto save_message;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 7da9780..d90e4ef 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -108,7 +108,7 @@
*/
struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
- const struct flowi6 *fl6)
+ struct flowi6 *fl6)
{
struct dst_entry *dst = NULL;
struct net_device *dev;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b6db56e..f8c61d2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -224,8 +224,8 @@
}
EXPORT_SYMBOL(tcf_hash_search);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
- int bind)
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = NULL;
@@ -235,9 +235,9 @@
p->tcfc_refcnt++;
a->priv = p;
a->hinfo = hinfo;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
EXPORT_SYMBOL(tcf_hash_check);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 02f5a8b..b7fa969 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -423,7 +423,8 @@
u16 ife_type = 0;
u8 *daddr = NULL;
u8 *saddr = NULL;
- int ret = 0, exists = 0;
+ bool exists = false;
+ int ret = 0;
int err;
err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8998a35..6148e32 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
char *tname;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
u32 hook = 0;
u32 index = 0;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 787751a..5b135d3 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,7 +62,8 @@
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0, exists = 0;
+ int ret, ok_push = 0;
+ bool exists = false;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index be5fbb5..318328d3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -86,8 +86,9 @@
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
+ bool exists = false;
+ int ret = 0, err;
char *defdata;
- int ret = 0, err, exists = 0;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7e2bc3c..53d1486 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -69,7 +69,8 @@
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index b075d50..db9b7ed 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -77,8 +77,8 @@
int action;
__be16 push_vid = 0;
__be16 push_proto = 0;
- int ret = 0, exists = 0;
- int err;
+ bool exists = false;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 04e0b05..789b69e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -375,11 +375,11 @@
q->head = (q->head + 1) & q->tab_mask;
if (!skb)
continue;
- qdisc_qstats_backlog_dec(sch, skb);
- --sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
@@ -455,7 +455,7 @@
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dddf3bb..c5bc424 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -174,7 +174,7 @@
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f49c81e..6eb0667 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -514,17 +514,25 @@
return skb;
}
+static void fq_flow_purge(struct fq_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+ flow->qlen = 0;
+}
+
static void fq_reset(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *root;
- struct sk_buff *skb;
struct rb_node *p;
struct fq_flow *f;
unsigned int idx;
- while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
- kfree_skb(skb);
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ fq_flow_purge(&q->internal);
if (!q->fq_root)
return;
@@ -535,8 +543,7 @@
f = container_of(p, struct fq_flow, fq_node);
rb_erase(p, root);
- while ((skb = fq_dequeue_head(sch, f)) != NULL)
- kfree_skb(skb);
+ fq_flow_purge(f);
kmem_cache_free(fq_flow_cachep, f);
}
@@ -737,7 +744,7 @@
if (!skb)
break;
drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
drop_count++;
}
qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a302e8e..2dc0a84 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -336,6 +336,12 @@
return skb;
}
+static void fq_codel_flow_purge(struct fq_codel_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+}
+
static void fq_codel_reset(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -346,18 +352,13 @@
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
- while (flow->head) {
- struct sk_buff *skb = dequeue_head(flow);
-
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- }
-
+ fq_codel_flow_purge(flow);
INIT_LIST_HEAD(&flow->flowchain);
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
q->memory_usage = 0;
}
@@ -433,7 +434,7 @@
struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
q->cstats.drop_count++;
}
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0c9cb51..773b632 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -493,7 +493,7 @@
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(qdisc, band2list(priv, prio));
+ __qdisc_reset_queue(band2list(priv, prio));
priv->bitmap = 0;
qdisc->qstats.backlog = 0;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c517918..c44593b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -464,7 +464,7 @@
struct sk_buff *skb;
while ((skb = hhf_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
static void *hhf_zalloc(size_t sz)
@@ -574,7 +574,7 @@
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
prev_backlog - sch->qstats.backlog);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 07dcd29..a454605 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -957,7 +957,7 @@
}
}
qdisc_watchdog_cancel(&q->watchdog);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
sch->q.qlen = 0;
sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
@@ -1231,7 +1231,7 @@
htb_destroy_class(sch, cl);
}
qdisc_class_hash_destroy(&q->clhash);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
}
static int htb_delete(struct Qdisc *sch, unsigned long arg)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 876df13..e271967 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,9 +368,7 @@
struct sk_buff *skb = netem_rb_to_skb(p);
rb_erase(p, &q->t_root);
- skb->next = NULL;
- skb->prev = NULL;
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 71ae3b9..912a46a 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -234,7 +234,7 @@
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a2e0b85..57d118b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -520,7 +520,7 @@
struct sk_buff *skb;
while ((skb = sfq_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
/*
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 57e460b..31b9f9c 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -6,7 +6,7 @@
tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
- name_distr.o subscr.o name_table.o net.o \
+ name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
server.o socket.o
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 93f7c98..64f4004 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -73,4 +73,5 @@
int tipc_in_scope(u32 domain, u32 addr);
int tipc_addr_scope(u32 domain);
char *tipc_addr_string_fill(char *string, u32 addr);
+
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6f11c62..9a70e1d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.c: TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -39,6 +39,7 @@
#include "bearer.h"
#include "link.h"
#include "discover.h"
+#include "monitor.h"
#include "bcast.h"
#include "netlink.h"
@@ -313,6 +314,10 @@
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
+ if (tipc_mon_create(net, bearer_id))
+ return -ENOMEM;
+
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
@@ -348,6 +353,7 @@
tipc_disc_delete(b->link_req);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
kfree_rcu(b, rcu);
+ tipc_mon_delete(net, bearer_id);
}
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41..0d337c7 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.h: Include file for TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fe1b062..236b043 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,7 @@
tn->net_id = 4711;
tn->own_addr = 0;
+ tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
get_random_bytes(&tn->random, sizeof(int));
INIT_LIST_HEAD(&tn->node_list);
spin_lock_init(&tn->node_list_lock);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eff58dc..a1845fb 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,11 +66,13 @@
struct tipc_link;
struct tipc_name_table;
struct tipc_server;
+struct tipc_monitor;
#define TIPC_MOD_VER "2.0.0"
-#define NODE_HTABLE_SIZE 512
-#define MAX_BEARERS 3
+#define NODE_HTABLE_SIZE 512
+#define MAX_BEARERS 3
+#define TIPC_DEF_MON_THRESHOLD 32
extern int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
@@ -88,6 +90,10 @@
u32 num_nodes;
u32 num_links;
+ /* Neighbor monitoring list */
+ struct tipc_monitor *monitors[MAX_BEARERS];
+ int mon_threshold;
+
/* Bearer list */
struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
@@ -126,6 +132,11 @@
return &tipc_net(net)->node_list;
}
+static inline unsigned int tipc_hashfn(u32 addr)
+{
+ return addr & (NODE_HTABLE_SIZE - 1);
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a904ccd..03f8bdf 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -42,6 +42,7 @@
#include "name_distr.h"
#include "discover.h"
#include "netlink.h"
+#include "monitor.h"
#include <linux/pkt_sched.h>
@@ -95,6 +96,7 @@
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
+ * @mon_state: cookie with information needed by link monitor
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -138,6 +140,7 @@
char if_name[TIPC_MAX_IF_NAME];
u32 priority;
char net_plane;
+ struct tipc_mon_state mon_state;
u16 rst_cnt;
/* Failover/synch */
@@ -708,18 +711,25 @@
bool setup = false;
u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
u16 bc_acked = l->bc_rcvlink->acked;
-
- link_profile_stats(l);
+ struct tipc_mon_state *mstate = &l->mon_state;
switch (l->state) {
case LINK_ESTABLISHED:
case LINK_SYNCHING:
- if (l->silent_intv_cnt > l->abort_limit)
- return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
mtyp = STATE_MSG;
+ link_profile_stats(l);
+ tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
+ if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
state = bc_acked != bc_snt;
- probe = l->silent_intv_cnt;
- l->silent_intv_cnt++;
+ state |= l->bc_rcvlink->rcv_unacked;
+ state |= l->rcv_unacked;
+ state |= !skb_queue_empty(&l->transmq);
+ state |= !skb_queue_empty(&l->deferdq);
+ probe = mstate->probing;
+ probe |= l->silent_intv_cnt;
+ if (probe || mstate->monitoring)
+ l->silent_intv_cnt++;
break;
case LINK_RESET:
setup = l->rst_cnt++ <= 4;
@@ -830,6 +840,7 @@
l->stats.recv_info = 0;
l->stale_count = 0;
l->bc_peer_is_up = false;
+ memset(&l->mon_state, 0, sizeof(l->mon_state));
tipc_link_reset_stats(l);
}
@@ -1238,6 +1249,9 @@
struct tipc_msg *hdr;
struct sk_buff_head *dfq = &l->deferdq;
bool node_up = link_is_up(l->bc_rcvlink);
+ struct tipc_mon_state *mstate = &l->mon_state;
+ int dlen = 0;
+ void *data;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1250,12 +1264,13 @@
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
- TIPC_MAX_IF_NAME, l->addr,
+ tipc_max_domain_size, l->addr,
tipc_own_addr(l->net), 0, 0, 0);
if (!skb)
return;
hdr = buf_msg(skb);
+ data = msg_data(hdr);
msg_set_session(hdr, l->session);
msg_set_bearer_id(hdr, l->bearer_id);
msg_set_net_plane(hdr, l->net_plane);
@@ -1271,14 +1286,18 @@
if (mtyp == STATE_MSG) {
msg_set_seq_gap(hdr, rcvgap);
- msg_set_size(hdr, INT_H_SIZE);
msg_set_probe(hdr, probe);
+ tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
+ msg_set_size(hdr, INT_H_SIZE + dlen);
+ skb_trim(skb, INT_H_SIZE + dlen);
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
msg_set_max_pkt(hdr, l->advertised_mtu);
- strcpy(msg_data(hdr), l->if_name);
+ strcpy(data, l->if_name);
+ msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
+ skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
}
if (probe)
l->stats.sent_probes++;
@@ -1371,7 +1390,9 @@
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
+ u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ void *data;
char *if_name;
int rc = 0;
@@ -1381,6 +1402,10 @@
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
+
switch (mtyp) {
case RESET_MSG:
@@ -1391,8 +1416,6 @@
/* fall thru' */
case ACTIVATE_MSG:
- skb_linearize(skb);
- hdr = buf_msg(skb);
/* Complete own link name with peer's interface name */
if_name = strrchr(l->name, ':') + 1;
@@ -1400,7 +1423,7 @@
break;
if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
break;
- strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
+ strncpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
@@ -1448,6 +1471,8 @@
rc = TIPC_LINK_UP_EVT;
break;
}
+ tipc_mon_rcv(l->net, data, dlen, l->addr,
+ &l->mon_state, l->bearer_id);
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 0000000..87d4efe
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,651 @@
+/*
+ * net/tipc/monitor.c
+ *
+ * Copyright (c) 2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "monitor.h"
+
+#define MAX_MON_DOMAIN 64
+#define MON_TIMEOUT 120000
+#define MAX_PEER_DOWN_EVENTS 4
+
+/* struct tipc_mon_domain: domain record to be transferred between peers
+ * @len: actual size of domain record
+ * @gen: current generation of sender's domain
+ * @ack_gen: most recent generation of self's domain acked by peer
+ * @member_cnt: number of domain member nodes described in this record
+ * @up_map: bit map indicating which of the members the sender considers up
+ * @members: identity of the domain members
+ */
+struct tipc_mon_domain {
+ u16 len;
+ u16 gen;
+ u16 ack_gen;
+ u16 member_cnt;
+ u64 up_map;
+ u32 members[MAX_MON_DOMAIN];
+};
+
+/* struct tipc_peer: state of a peer node and its domain
+ * @addr: tipc node identity of peer
+ * @head_map: shows which other nodes currently consider peer 'up'
+ * @domain: most recent domain record from peer
+ * @hash: position in hashed lookup list
+ * @list: position in linked list, in circular ascending order by 'addr'
+ * @applied: number of reported domain members applied on this monitor list
+ * @is_up: peer is up as seen from this node
+ * @is_head: peer is assigned domain head as seen from this node
+ * @is_local: peer is in local domain and should be continuously monitored
+ * @down_cnt: - numbers of other peers which have reported this on lost
+ */
+struct tipc_peer {
+ u32 addr;
+ struct tipc_mon_domain *domain;
+ struct hlist_node hash;
+ struct list_head list;
+ u8 applied;
+ u8 down_cnt;
+ bool is_up;
+ bool is_head;
+ bool is_local;
+};
+
+struct tipc_monitor {
+ struct hlist_head peers[NODE_HTABLE_SIZE];
+ int peer_cnt;
+ struct tipc_peer *self;
+ rwlock_t lock;
+ struct tipc_mon_domain cache;
+ u16 list_gen;
+ u16 dom_gen;
+ struct net *net;
+ struct timer_list timer;
+ unsigned long timer_intv;
+};
+
+static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
+{
+ return tipc_net(net)->monitors[bearer_id];
+}
+
+const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+
+/* dom_rec_len(): actual length of domain record for transport
+ */
+static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
+{
+ return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+}
+
+/* dom_size() : calculate size of own domain based on number of peers
+ */
+static int dom_size(int peers)
+{
+ int i = 0;
+
+ while ((i * i) < peers)
+ i++;
+ return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+}
+
+static void map_set(u64 *up_map, int i, unsigned int v)
+{
+ *up_map &= ~(1 << i);
+ *up_map |= (v << i);
+}
+
+static int map_get(u64 up_map, int i)
+{
+ return (up_map & (1 << i)) >> i;
+}
+
+static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+{
+ return list_last_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
+{
+ return list_first_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_head(struct tipc_peer *peer)
+{
+ while (!peer->is_head)
+ peer = peer_prev(peer);
+ return peer;
+}
+
+static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
+{
+ struct tipc_peer *peer;
+ unsigned int thash = tipc_hashfn(addr);
+
+ hlist_for_each_entry(peer, &mon->peers[thash], hash) {
+ if (peer->addr == addr)
+ return peer;
+ }
+ return NULL;
+}
+
+static struct tipc_peer *get_self(struct net *net, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+
+ return mon->self;
+}
+
+static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return mon->peer_cnt > tn->mon_threshold;
+}
+
+/* mon_identify_lost_members() : - identify amd mark potentially lost members
+ */
+static void mon_identify_lost_members(struct tipc_peer *peer,
+ struct tipc_mon_domain *dom_bef,
+ int applied_bef)
+{
+ struct tipc_peer *member = peer;
+ struct tipc_mon_domain *dom_aft = peer->domain;
+ int applied_aft = peer->applied;
+ int i;
+
+ for (i = 0; i < applied_bef; i++) {
+ member = peer_nxt(member);
+
+ /* Do nothing if self or peer already see member as down */
+ if (!member->is_up || !map_get(dom_bef->up_map, i))
+ continue;
+
+ /* Loss of local node must be detected by active probing */
+ if (member->is_local)
+ continue;
+
+ /* Start probing if member was removed from applied domain */
+ if (!applied_aft || (applied_aft < i)) {
+ member->down_cnt = 1;
+ continue;
+ }
+
+ /* Member loss is confirmed if it is still in applied domain */
+ if (!map_get(dom_aft->up_map, i))
+ member->down_cnt++;
+ }
+}
+
+/* mon_apply_domain() : match a peer's domain record against monitor list
+ */
+static void mon_apply_domain(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ struct tipc_mon_domain *dom = peer->domain;
+ struct tipc_peer *member;
+ u32 addr;
+ int i;
+
+ if (!dom || !peer->is_up)
+ return;
+
+ /* Scan across domain members and match against monitor list */
+ peer->applied = 0;
+ member = peer_nxt(peer);
+ for (i = 0; i < dom->member_cnt; i++) {
+ addr = dom->members[i];
+ if (addr != member->addr)
+ return;
+ peer->applied++;
+ member = peer_nxt(member);
+ }
+}
+
+/* mon_update_local_domain() : update after peer addition/removal/up/down
+ */
+static void mon_update_local_domain(struct tipc_monitor *mon)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_mon_domain *cache = &mon->cache;
+ struct tipc_mon_domain *dom = self->domain;
+ struct tipc_peer *peer = self;
+ u64 prev_up_map = dom->up_map;
+ u16 member_cnt, i;
+ bool diff;
+
+ /* Update local domain size based on current size of cluster */
+ member_cnt = dom_size(mon->peer_cnt) - 1;
+ self->applied = member_cnt;
+
+ /* Update native and cached outgoing local domain records */
+ dom->len = dom_rec_len(dom, member_cnt);
+ diff = dom->member_cnt != member_cnt;
+ dom->member_cnt = member_cnt;
+ for (i = 0; i < member_cnt; i++) {
+ peer = peer_nxt(peer);
+ diff |= dom->members[i] != peer->addr;
+ dom->members[i] = peer->addr;
+ map_set(&dom->up_map, i, peer->is_up);
+ cache->members[i] = htonl(peer->addr);
+ }
+ diff |= dom->up_map != prev_up_map;
+ if (!diff)
+ return;
+ dom->gen = ++mon->dom_gen;
+ cache->len = htons(dom->len);
+ cache->gen = htons(dom->gen);
+ cache->member_cnt = htons(member_cnt);
+ cache->up_map = cpu_to_be64(dom->up_map);
+ mon_apply_domain(mon, self);
+}
+
+/* mon_update_neighbors() : update preceding neighbors of added/removed peer
+ */
+static void mon_update_neighbors(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ int dz, i;
+
+ dz = dom_size(mon->peer_cnt);
+ for (i = 0; i < dz; i++) {
+ mon_apply_domain(mon, peer);
+ peer = peer_prev(peer);
+ }
+}
+
+/* mon_assign_roles() : reassign peer roles after a network change
+ * The monitor list is consistent at this stage; i.e., each peer is monitoring
+ * a set of domain members as matched between domain record and the monitor list
+ */
+static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
+{
+ struct tipc_peer *peer = peer_nxt(head);
+ struct tipc_peer *self = mon->self;
+ int i = 0;
+
+ for (; peer != self; peer = peer_nxt(peer)) {
+ peer->is_local = false;
+
+ /* Update domain member */
+ if (i++ < head->applied) {
+ peer->is_head = false;
+ if (head == self)
+ peer->is_local = true;
+ continue;
+ }
+ /* Assign next domain head */
+ if (!peer->is_up)
+ continue;
+ if (peer->is_head)
+ break;
+ head = peer;
+ head->is_head = true;
+ i = 0;
+ }
+ mon->list_gen++;
+}
+
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *prev, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer)
+ goto exit;
+ prev = peer_prev(peer);
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ mon->peer_cnt--;
+ head = peer_head(prev);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_update_neighbors(mon, prev);
+
+ /* Revert to full-mesh monitoring if we reach threshold */
+ if (!tipc_mon_is_active(net, mon)) {
+ list_for_each_entry(peer, &self->list, list) {
+ kfree(peer->domain);
+ peer->domain = NULL;
+ peer->applied = 0;
+ }
+ }
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
+ struct tipc_peer **peer)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_peer *cur, *prev, *p;
+
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ *peer = p;
+ if (!p)
+ return false;
+ p->addr = addr;
+
+ /* Add new peer to lookup list */
+ INIT_LIST_HEAD(&p->list);
+ hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
+
+ /* Sort new peer into iterator list, in ascending circular order */
+ prev = self;
+ list_for_each_entry(cur, &self->list, list) {
+ if ((addr > prev->addr) && (addr < cur->addr))
+ break;
+ if (((addr < cur->addr) || (addr > prev->addr)) &&
+ (prev->addr > cur->addr))
+ break;
+ prev = cur;
+ }
+ list_add_tail(&p->list, &cur->list);
+ mon->peer_cnt++;
+ mon_update_neighbors(mon, p);
+ return true;
+}
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
+ goto exit;
+ peer->is_up = true;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+ struct tipc_mon_domain *dom;
+ int applied;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer) {
+ pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
+ goto exit;
+ }
+ applied = peer->applied;
+ peer->applied = 0;
+ dom = peer->domain;
+ peer->domain = NULL;
+ if (peer->is_head)
+ mon_identify_lost_members(peer, dom, applied);
+ kfree(dom);
+ peer->is_up = false;
+ peer->is_head = false;
+ peer->is_local = false;
+ peer->down_cnt = 0;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+/* tipc_mon_rcv - process monitor domain event message
+ */
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *arrv_dom = data;
+ struct tipc_mon_domain dom_bef;
+ struct tipc_mon_domain *dom;
+ struct tipc_peer *peer;
+ u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+ int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
+ u16 new_gen = ntohs(arrv_dom->gen);
+ u16 acked_gen = ntohs(arrv_dom->ack_gen);
+ bool probing = state->probing;
+ int i, applied_bef;
+
+ state->probing = false;
+ if (!dlen)
+ return;
+
+ /* Sanity check received domain record */
+ if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
+ pr_warn_ratelimited("Received illegal domain record\n");
+ return;
+ }
+
+ /* Synch generation numbers with peer if link just came up */
+ if (!state->synched) {
+ state->peer_gen = new_gen - 1;
+ state->acked_gen = acked_gen;
+ state->synched = true;
+ }
+
+ if (more(acked_gen, state->acked_gen))
+ state->acked_gen = acked_gen;
+
+ /* Drop duplicate unless we are waiting for a probe response */
+ if (!more(new_gen, state->peer_gen) && !probing)
+ return;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer || !peer->is_up)
+ goto exit;
+
+ /* Peer is confirmed, stop any ongoing probing */
+ peer->down_cnt = 0;
+
+ /* Task is done for duplicate record */
+ if (!more(new_gen, state->peer_gen))
+ goto exit;
+
+ state->peer_gen = new_gen;
+
+ /* Cache current domain record for later use */
+ dom_bef.member_cnt = 0;
+ dom = peer->domain;
+ if (dom)
+ memcpy(&dom_bef, dom, dom->len);
+
+ /* Transform and store received domain record */
+ if (!dom || (dom->len < new_dlen)) {
+ kfree(dom);
+ dom = kmalloc(new_dlen, GFP_ATOMIC);
+ peer->domain = dom;
+ if (!dom)
+ goto exit;
+ }
+ dom->len = new_dlen;
+ dom->gen = new_gen;
+ dom->member_cnt = new_member_cnt;
+ dom->up_map = be64_to_cpu(arrv_dom->up_map);
+ for (i = 0; i < new_member_cnt; i++)
+ dom->members[i] = ntohl(arrv_dom->members[i]);
+
+ /* Update peers affected by this domain record */
+ applied_bef = peer->applied;
+ mon_apply_domain(mon, peer);
+ mon_identify_lost_members(peer, &dom_bef, applied_bef);
+ mon_assign_roles(mon, peer_head(peer));
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *dom = data;
+ u16 gen = mon->dom_gen;
+ u16 len;
+
+ if (!tipc_mon_is_active(net, mon))
+ return;
+
+ /* Send only a dummy record with ack if peer has acked our last sent */
+ if (likely(state->acked_gen == gen)) {
+ len = dom_rec_len(dom, 0);
+ *dlen = len;
+ dom->len = htons(len);
+ dom->gen = htons(gen);
+ dom->ack_gen = htons(state->peer_gen);
+ dom->member_cnt = 0;
+ return;
+ }
+ /* Send the full record */
+ read_lock_bh(&mon->lock);
+ len = ntohs(mon->cache.len);
+ *dlen = len;
+ memcpy(data, &mon->cache, len);
+ read_unlock_bh(&mon->lock);
+ dom->ack_gen = htons(state->peer_gen);
+}
+
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *peer;
+
+ /* Used cached state if table has not changed */
+ if (!state->probing &&
+ (state->list_gen == mon->list_gen) &&
+ (state->acked_gen == mon->dom_gen))
+ return;
+
+ read_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (peer) {
+ state->probing = state->acked_gen != mon->dom_gen;
+ state->probing |= peer->down_cnt;
+ state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
+ state->monitoring = peer->is_local;
+ state->monitoring |= peer->is_head;
+ state->list_gen = mon->list_gen;
+ }
+ read_unlock_bh(&mon->lock);
+}
+
+static void mon_timeout(unsigned long m)
+{
+ struct tipc_monitor *mon = (void *)m;
+ struct tipc_peer *self;
+ int best_member_cnt = dom_size(mon->peer_cnt) - 1;
+
+ write_lock_bh(&mon->lock);
+ self = mon->self;
+ if (self && (best_member_cnt != self->applied)) {
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, self);
+ }
+ write_unlock_bh(&mon->lock);
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+}
+
+int tipc_mon_create(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon;
+ struct tipc_peer *self;
+ struct tipc_mon_domain *dom;
+
+ if (tn->monitors[bearer_id])
+ return 0;
+
+ mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
+ self = kzalloc(sizeof(*self), GFP_ATOMIC);
+ dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
+ if (!mon || !self || !dom) {
+ kfree(mon);
+ kfree(self);
+ kfree(dom);
+ return -ENOMEM;
+ }
+ tn->monitors[bearer_id] = mon;
+ rwlock_init(&mon->lock);
+ mon->net = net;
+ mon->peer_cnt = 1;
+ mon->self = self;
+ self->domain = dom;
+ self->addr = tipc_own_addr(net);
+ self->is_up = true;
+ self->is_head = true;
+ INIT_LIST_HEAD(&self->list);
+ setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+ mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+ return 0;
+}
+
+void tipc_mon_delete(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *tmp;
+
+ write_lock_bh(&mon->lock);
+ tn->monitors[bearer_id] = NULL;
+ list_for_each_entry_safe(peer, tmp, &self->list, list) {
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ }
+ mon->self = NULL;
+ write_unlock_bh(&mon->lock);
+ del_timer_sync(&mon->timer);
+ kfree(self->domain);
+ kfree(self);
+ kfree(mon);
+}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 0000000..598459c
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/monitor.h
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MONITOR_H
+#define _TIPC_MONITOR_H
+
+/* struct tipc_mon_state: link instance's cache of monitor list and domain state
+ * @list_gen: current generation of this node's monitor list
+ * @gen: current generation of this node's local domain
+ * @peer_gen: most recent domain generation received from peer
+ * @acked_gen: most recent generation of self's domain acked by peer
+ * @monitoring: this peer endpoint should continuously monitored
+ * @probing: peer endpoint should be temporarily probed for potential loss
+ * @synched: domain record's generation has been synched with peer after reset
+ */
+struct tipc_mon_state {
+ u16 list_gen;
+ u16 peer_gen;
+ u16 acked_gen;
+ bool monitoring :1;
+ bool probing :1;
+ bool reset :1;
+ bool synched :1;
+};
+
+int tipc_mon_create(struct net *net, int bearer_id);
+void tipc_mon_delete(struct net *net, int bearer_id);
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id);
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
+
+extern const int tipc_max_domain_size;
+#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d6a490f..a3fc0a3 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,6 +40,7 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "monitor.h"
#include "discover.h"
#include "netlink.h"
@@ -205,17 +206,6 @@
return caps;
}
-/*
- * A trivial power-of-two bitmask technique is used for speed, since this
- * operation is done for every incoming TIPC packet. The number of hash table
- * entries has been chosen so that no hash chain exceeds 8 nodes and will
- * usually be much smaller (typically only a single node).
- */
-static unsigned int tipc_hashfn(u32 addr)
-{
- return addr & (NODE_HTABLE_SIZE - 1);
-}
-
static void tipc_node_kref_release(struct kref *kref)
{
struct tipc_node *n = container_of(kref, struct tipc_node, kref);
@@ -279,6 +269,7 @@
u32 addr = 0;
u32 flags = n->action_flags;
u32 link_id = 0;
+ u32 bearer_id;
struct list_head *publ_list;
if (likely(!flags)) {
@@ -288,6 +279,7 @@
addr = n->addr;
link_id = n->link_id;
+ bearer_id = link_id & 0xffff;
publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -301,13 +293,16 @@
if (flags & TIPC_NOTIFY_NODE_UP)
tipc_named_node_up(net, addr);
- if (flags & TIPC_NOTIFY_LINK_UP)
+ if (flags & TIPC_NOTIFY_LINK_UP) {
+ tipc_mon_peer_up(net, addr, bearer_id);
tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
TIPC_NODE_SCOPE, link_id, addr);
-
- if (flags & TIPC_NOTIFY_LINK_DOWN)
+ }
+ if (flags & TIPC_NOTIFY_LINK_DOWN) {
+ tipc_mon_peer_down(net, addr, bearer_id);
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
+ }
}
struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
@@ -691,6 +686,7 @@
struct tipc_link *l = le->link;
struct tipc_media_addr *maddr;
struct sk_buff_head xmitq;
+ int old_bearer_id = bearer_id;
if (!l)
return;
@@ -710,6 +706,8 @@
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
tipc_node_write_unlock(n);
+ if (delete)
+ tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 6ba7455..50e086c 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -1,6 +1,6 @@
all:
-all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring
CFLAGS += -Wall
CFLAGS += -pthread -O2 -ggdb
@@ -8,6 +8,7 @@
main.o: main.c main.h
ring.o: ring.c main.h
+ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h
virtio_ring_0_9.o: virtio_ring_0_9.c main.h
virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h
@@ -15,11 +16,13 @@
virtio_ring_0_9: virtio_ring_0_9.o main.o
virtio_ring_poll: virtio_ring_poll.o main.o
virtio_ring_inorder: virtio_ring_inorder.o main.o
+ptr_ring: ptr_ring.o main.o
clean:
-rm main.o
-rm ring.o ring
-rm virtio_ring_0_9.o virtio_ring_0_9
-rm virtio_ring_poll.o virtio_ring_poll
-rm virtio_ring_inorder.o virtio_ring_inorder
+ -rm ptr_ring.o ptr_ring
.PHONY: all clean
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
new file mode 100644
index 0000000..74abd74
--- /dev/null
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -0,0 +1,192 @@
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#define SMP_CACHE_BYTES 64
+#define cache_line_size() SMP_CACHE_BYTES
+#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+typedef pthread_spinlock_t spinlock_t;
+
+typedef int gfp_t;
+static void *kzalloc(unsigned size, gfp_t gfp)
+{
+ void *p = memalign(64, size);
+ if (!p)
+ return p;
+ memset(p, 0, size);
+
+ return p;
+}
+
+static void kfree(void *p)
+{
+ if (p)
+ free(p);
+}
+
+static void spin_lock_init(spinlock_t *lock)
+{
+ int r = pthread_spin_init(lock, 0);
+ assert(!r);
+}
+
+static void spin_lock(spinlock_t *lock)
+{
+ int ret = pthread_spin_lock(lock);
+ assert(!ret);
+}
+
+static void spin_unlock(spinlock_t *lock)
+{
+ int ret = pthread_spin_unlock(lock);
+ assert(!ret);
+}
+
+static void spin_lock_bh(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_bh(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irq(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irq(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+ spin_unlock(lock);
+}
+
+#include "../../../include/linux/ptr_ring.h"
+
+static unsigned long long headcnt, tailcnt;
+static struct ptr_ring array ____cacheline_aligned_in_smp;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret = ptr_ring_init(&array, ring_size, 0);
+ assert(!ret);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ int ret;
+
+ ret = __ptr_ring_produce(&array, buf);
+ if (ret >= 0) {
+ ret = 0;
+ headcnt++;
+ }
+
+ return ret;
+}
+
+/*
+ * ptr_ring API provides no way for producer to find out whether a given
+ * buffer was consumed. Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ void *datap;
+
+ if (tailcnt == headcnt || __ptr_ring_full(&array))
+ datap = NULL;
+ else {
+ datap = "Buffer\n";
+ ++tailcnt;
+ }
+
+ return datap;
+}
+
+void poll_used(void)
+{
+ void *b;
+
+ do {
+ if (tailcnt == headcnt || __ptr_ring_full(&array)) {
+ b = NULL;
+ barrier();
+ } else {
+ b = "Buffer\n";
+ }
+ } while (!b);
+}
+
+void disable_call()
+{
+ assert(0);
+}
+
+bool enable_call()
+{
+ assert(0);
+}
+
+void kick_available(void)
+{
+ assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+ assert(0);
+}
+
+bool enable_kick()
+{
+ assert(0);
+}
+
+void poll_avail(void)
+{
+ void *b;
+
+ do {
+ barrier();
+ b = __ptr_ring_peek(&array);
+ } while (!b);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ void *ptr;
+
+ ptr = __ptr_ring_consume(&array);
+
+ return ptr;
+}
+
+void call_used(void)
+{
+ assert(0);
+}