Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index b959659..ccb6048 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -603,3 +603,10 @@
Who: Jean Delvare <khali@linux-fr.org>
----------------------------
+
+What: xt_connlimit rev 0
+When: 2012
+Who: Jan Engelhardt <jengelh@medozas.de>
+Files: net/netfilter/xt_connlimit.c
+
+----------------------------
diff --git a/drivers/net/atl1c/atl1c_hw.c b/drivers/net/atl1c/atl1c_hw.c
index 1bf6720..23f2ab0 100644
--- a/drivers/net/atl1c/atl1c_hw.c
+++ b/drivers/net/atl1c/atl1c_hw.c
@@ -345,7 +345,7 @@
*/
static int atl1c_phy_setup_adv(struct atl1c_hw *hw)
{
- u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_SPEED_MASK;
+ u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_ALL;
u16 mii_giga_ctrl_data = GIGA_CR_1000T_DEFAULT_CAP &
~GIGA_CR_1000T_SPEED_MASK;
@@ -373,7 +373,7 @@
}
if (atl1c_write_phy_reg(hw, MII_ADVERTISE, mii_adv_data) != 0 ||
- atl1c_write_phy_reg(hw, MII_GIGA_CR, mii_giga_ctrl_data) != 0)
+ atl1c_write_phy_reg(hw, MII_CTRL1000, mii_giga_ctrl_data) != 0)
return -1;
return 0;
}
@@ -517,19 +517,18 @@
"Error Setting up Auto-Negotiation\n");
return ret_val;
}
- mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+ mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
break;
case MEDIA_TYPE_100M_FULL:
- mii_bmcr_data |= BMCR_SPEED_100 | BMCR_FULL_DUPLEX;
+ mii_bmcr_data |= BMCR_SPEED100 | BMCR_FULLDPLX;
break;
case MEDIA_TYPE_100M_HALF:
- mii_bmcr_data |= BMCR_SPEED_100;
+ mii_bmcr_data |= BMCR_SPEED100;
break;
case MEDIA_TYPE_10M_FULL:
- mii_bmcr_data |= BMCR_SPEED_10 | BMCR_FULL_DUPLEX;
+ mii_bmcr_data |= BMCR_FULLDPLX;
break;
case MEDIA_TYPE_10M_HALF:
- mii_bmcr_data |= BMCR_SPEED_10;
break;
default:
if (netif_msg_link(adapter))
@@ -657,7 +656,7 @@
err = atl1c_phy_setup_adv(hw);
if (err)
return err;
- mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+ mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
return atl1c_write_phy_reg(hw, MII_BMCR, mii_bmcr_data);
}
diff --git a/drivers/net/atl1c/atl1c_hw.h b/drivers/net/atl1c/atl1c_hw.h
index 3dd6759..655fc6c 100644
--- a/drivers/net/atl1c/atl1c_hw.h
+++ b/drivers/net/atl1c/atl1c_hw.h
@@ -736,55 +736,16 @@
#define REG_DEBUG_DATA0 0x1900
#define REG_DEBUG_DATA1 0x1904
-/* PHY Control Register */
-#define MII_BMCR 0x00
-#define BMCR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */
-#define BMCR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */
-#define BMCR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */
-#define BMCR_ISOLATE 0x0400 /* Isolate PHY from MII */
-#define BMCR_POWER_DOWN 0x0800 /* Power down */
-#define BMCR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */
-#define BMCR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */
-#define BMCR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */
-#define BMCR_SPEED_MASK 0x2040
-#define BMCR_SPEED_1000 0x0040
-#define BMCR_SPEED_100 0x2000
-#define BMCR_SPEED_10 0x0000
-
-/* PHY Status Register */
-#define MII_BMSR 0x01
-#define BMMSR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */
-#define BMSR_JABBER_DETECT 0x0002 /* Jabber Detected */
-#define BMSR_LINK_STATUS 0x0004 /* Link Status 1 = link */
-#define BMSR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */
-#define BMSR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */
-#define BMSR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
-#define BMSR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
-#define BMSR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */
-#define BMSR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */
-#define BMSR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */
-#define BMSR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */
-#define BMSR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */
-#define BMSR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */
-#define BMMII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */
-#define BMMII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */
-
-#define MII_PHYSID1 0x02
-#define MII_PHYSID2 0x03
#define L1D_MPW_PHYID1 0xD01C /* V7 */
#define L1D_MPW_PHYID2 0xD01D /* V1-V6 */
#define L1D_MPW_PHYID3 0xD01E /* V8 */
/* Autoneg Advertisement Register */
-#define MII_ADVERTISE 0x04
-#define ADVERTISE_SPEED_MASK 0x01E0
-#define ADVERTISE_DEFAULT_CAP 0x0DE0
+#define ADVERTISE_DEFAULT_CAP \
+ (ADVERTISE_ALL | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)
/* 1000BASE-T Control Register */
-#define MII_GIGA_CR 0x09
#define GIGA_CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port 0=DTE device */
#define GIGA_CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master 0=Configure PHY as Slave */
diff --git a/drivers/net/atl1e/atl1e_ethtool.c b/drivers/net/atl1e/atl1e_ethtool.c
index 6943a6c..1209297 100644
--- a/drivers/net/atl1e/atl1e_ethtool.c
+++ b/drivers/net/atl1e/atl1e_ethtool.c
@@ -95,18 +95,18 @@
ecmd->advertising = hw->autoneg_advertised |
ADVERTISED_TP | ADVERTISED_Autoneg;
- adv4 = hw->mii_autoneg_adv_reg & ~MII_AR_SPEED_MASK;
+ adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL;
adv9 = hw->mii_1000t_ctrl_reg & ~MII_AT001_CR_1000T_SPEED_MASK;
if (hw->autoneg_advertised & ADVERTISE_10_HALF)
- adv4 |= MII_AR_10T_HD_CAPS;
+ adv4 |= ADVERTISE_10HALF;
if (hw->autoneg_advertised & ADVERTISE_10_FULL)
- adv4 |= MII_AR_10T_FD_CAPS;
+ adv4 |= ADVERTISE_10FULL;
if (hw->autoneg_advertised & ADVERTISE_100_HALF)
- adv4 |= MII_AR_100TX_HD_CAPS;
+ adv4 |= ADVERTISE_100HALF;
if (hw->autoneg_advertised & ADVERTISE_100_FULL)
- adv4 |= MII_AR_100TX_FD_CAPS;
+ adv4 |= ADVERTISE_100FULL;
if (hw->autoneg_advertised & ADVERTISE_1000_FULL)
- adv9 |= MII_AT001_CR_1000T_FD_CAPS;
+ adv9 |= ADVERTISE_1000FULL;
if (adv4 != hw->mii_autoneg_adv_reg ||
adv9 != hw->mii_1000t_ctrl_reg) {
diff --git a/drivers/net/atl1e/atl1e_hw.c b/drivers/net/atl1e/atl1e_hw.c
index 76cc043..923063d 100644
--- a/drivers/net/atl1e/atl1e_hw.c
+++ b/drivers/net/atl1e/atl1e_hw.c
@@ -318,7 +318,7 @@
* Advertisement Register (Address 4) and the 1000 mb speed bits in
* the 1000Base-T control Register (Address 9).
*/
- mii_autoneg_adv_reg &= ~MII_AR_SPEED_MASK;
+ mii_autoneg_adv_reg &= ~ADVERTISE_ALL;
mii_1000t_ctrl_reg &= ~MII_AT001_CR_1000T_SPEED_MASK;
/*
@@ -327,44 +327,37 @@
*/
switch (hw->media_type) {
case MEDIA_TYPE_AUTO_SENSOR:
- mii_autoneg_adv_reg |= (MII_AR_10T_HD_CAPS |
- MII_AR_10T_FD_CAPS |
- MII_AR_100TX_HD_CAPS |
- MII_AR_100TX_FD_CAPS);
- hw->autoneg_advertised = ADVERTISE_10_HALF |
- ADVERTISE_10_FULL |
- ADVERTISE_100_HALF |
- ADVERTISE_100_FULL;
+ mii_autoneg_adv_reg |= ADVERTISE_ALL;
+ hw->autoneg_advertised = ADVERTISE_ALL;
if (hw->nic_type == athr_l1e) {
- mii_1000t_ctrl_reg |=
- MII_AT001_CR_1000T_FD_CAPS;
+ mii_1000t_ctrl_reg |= ADVERTISE_1000FULL;
hw->autoneg_advertised |= ADVERTISE_1000_FULL;
}
break;
case MEDIA_TYPE_100M_FULL:
- mii_autoneg_adv_reg |= MII_AR_100TX_FD_CAPS;
+ mii_autoneg_adv_reg |= ADVERTISE_100FULL;
hw->autoneg_advertised = ADVERTISE_100_FULL;
break;
case MEDIA_TYPE_100M_HALF:
- mii_autoneg_adv_reg |= MII_AR_100TX_HD_CAPS;
+ mii_autoneg_adv_reg |= ADVERTISE_100_HALF;
hw->autoneg_advertised = ADVERTISE_100_HALF;
break;
case MEDIA_TYPE_10M_FULL:
- mii_autoneg_adv_reg |= MII_AR_10T_FD_CAPS;
+ mii_autoneg_adv_reg |= ADVERTISE_10_FULL;
hw->autoneg_advertised = ADVERTISE_10_FULL;
break;
default:
- mii_autoneg_adv_reg |= MII_AR_10T_HD_CAPS;
+ mii_autoneg_adv_reg |= ADVERTISE_10_HALF;
hw->autoneg_advertised = ADVERTISE_10_HALF;
break;
}
/* flow control fixed to enable all */
- mii_autoneg_adv_reg |= (MII_AR_ASM_DIR | MII_AR_PAUSE);
+ mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
hw->mii_autoneg_adv_reg = mii_autoneg_adv_reg;
hw->mii_1000t_ctrl_reg = mii_1000t_ctrl_reg;
@@ -374,7 +367,7 @@
return ret_val;
if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
- ret_val = atl1e_write_phy_reg(hw, MII_AT001_CR,
+ ret_val = atl1e_write_phy_reg(hw, MII_CTRL1000,
mii_1000t_ctrl_reg);
if (ret_val)
return ret_val;
@@ -397,7 +390,7 @@
int ret_val;
u16 phy_data;
- phy_data = MII_CR_RESET | MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG;
+ phy_data = BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART;
ret_val = atl1e_write_phy_reg(hw, MII_BMCR, phy_data);
if (ret_val) {
@@ -645,15 +638,14 @@
return err;
if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
- err = atl1e_write_phy_reg(hw, MII_AT001_CR,
+ err = atl1e_write_phy_reg(hw, MII_CTRL1000,
hw->mii_1000t_ctrl_reg);
if (err)
return err;
}
err = atl1e_write_phy_reg(hw, MII_BMCR,
- MII_CR_RESET | MII_CR_AUTO_NEG_EN |
- MII_CR_RESTART_AUTO_NEG);
+ BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
return err;
}
diff --git a/drivers/net/atl1e/atl1e_hw.h b/drivers/net/atl1e/atl1e_hw.h
index 5ea2f4d..74df16a 100644
--- a/drivers/net/atl1e/atl1e_hw.h
+++ b/drivers/net/atl1e/atl1e_hw.h
@@ -629,127 +629,24 @@
/***************************** MII definition ***************************************/
/* PHY Common Register */
-#define MII_BMCR 0x00
-#define MII_BMSR 0x01
-#define MII_PHYSID1 0x02
-#define MII_PHYSID2 0x03
-#define MII_ADVERTISE 0x04
-#define MII_LPA 0x05
-#define MII_EXPANSION 0x06
-#define MII_AT001_CR 0x09
-#define MII_AT001_SR 0x0A
-#define MII_AT001_ESR 0x0F
#define MII_AT001_PSCR 0x10
#define MII_AT001_PSSR 0x11
#define MII_INT_CTRL 0x12
#define MII_INT_STATUS 0x13
#define MII_SMARTSPEED 0x14
-#define MII_RERRCOUNTER 0x15
-#define MII_SREVISION 0x16
-#define MII_RESV1 0x17
#define MII_LBRERROR 0x18
-#define MII_PHYADDR 0x19
#define MII_RESV2 0x1a
-#define MII_TPISTATUS 0x1b
-#define MII_NCONFIG 0x1c
#define MII_DBG_ADDR 0x1D
#define MII_DBG_DATA 0x1E
-
-/* PHY Control Register */
-#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */
-#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */
-#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */
-#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */
-#define MII_CR_POWER_DOWN 0x0800 /* Power down */
-#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */
-#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */
-#define MII_CR_SPEED_MASK 0x2040
-#define MII_CR_SPEED_1000 0x0040
-#define MII_CR_SPEED_100 0x2000
-#define MII_CR_SPEED_10 0x0000
-
-
-/* PHY Status Register */
-#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */
-#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */
-#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */
-#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */
-#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */
-#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
-#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
-#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */
-#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */
-#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */
-#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */
-#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */
-#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */
-#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */
-#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */
-
-/* Link partner ability register. */
-#define MII_LPA_SLCT 0x001f /* Same as advertise selector */
-#define MII_LPA_10HALF 0x0020 /* Can do 10mbps half-duplex */
-#define MII_LPA_10FULL 0x0040 /* Can do 10mbps full-duplex */
-#define MII_LPA_100HALF 0x0080 /* Can do 100mbps half-duplex */
-#define MII_LPA_100FULL 0x0100 /* Can do 100mbps full-duplex */
-#define MII_LPA_100BASE4 0x0200 /* 100BASE-T4 */
-#define MII_LPA_PAUSE 0x0400 /* PAUSE */
-#define MII_LPA_ASYPAUSE 0x0800 /* Asymmetrical PAUSE */
-#define MII_LPA_RFAULT 0x2000 /* Link partner faulted */
-#define MII_LPA_LPACK 0x4000 /* Link partner acked us */
-#define MII_LPA_NPAGE 0x8000 /* Next page bit */
-
/* Autoneg Advertisement Register */
-#define MII_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */
-#define MII_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */
-#define MII_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */
-#define MII_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */
-#define MII_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */
-#define MII_AR_100T4_CAPS 0x0200 /* 100T4 Capable */
-#define MII_AR_PAUSE 0x0400 /* Pause operation desired */
-#define MII_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */
-#define MII_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */
-#define MII_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */
-#define MII_AR_SPEED_MASK 0x01E0
-#define MII_AR_DEFAULT_CAP_MASK 0x0DE0
+#define MII_AR_DEFAULT_CAP_MASK 0
/* 1000BASE-T Control Register */
-#define MII_AT001_CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */
-#define MII_AT001_CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */
-#define MII_AT001_CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */
-/* 0=DTE device */
-#define MII_AT001_CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */
-/* 0=Configure PHY as Slave */
-#define MII_AT001_CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */
-/* 0=Automatic Master/Slave config */
-#define MII_AT001_CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
-#define MII_AT001_CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */
-#define MII_AT001_CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */
-#define MII_AT001_CR_1000T_SPEED_MASK 0x0300
-#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK 0x0300
-
-/* 1000BASE-T Status Register */
-#define MII_AT001_SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */
-#define MII_AT001_SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */
-#define MII_AT001_SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local TX is Master, 0=Slave */
-#define MII_AT001_SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS_SHIFT 12
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS_SHIFT 13
-
-/* Extended Status Register */
-#define MII_AT001_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */
-#define MII_AT001_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */
-#define MII_AT001_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */
-#define MII_AT001_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */
+#define MII_AT001_CR_1000T_SPEED_MASK \
+ (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK MII_AT001_CR_1000T_SPEED_MASK
/* AT001 PHY Specific Control Register */
#define MII_AT001_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index e28f8ba..bf7500c 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -2051,9 +2051,9 @@
atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
- mii_advertise_data = MII_AR_10T_HD_CAPS;
+ mii_advertise_data = ADVERTISE_10HALF;
- if ((atl1e_write_phy_reg(hw, MII_AT001_CR, 0) != 0) ||
+ if ((atl1e_write_phy_reg(hw, MII_CTRL1000, 0) != 0) ||
(atl1e_write_phy_reg(hw,
MII_ADVERTISE, mii_advertise_data) != 0) ||
(atl1e_phy_commit(hw)) != 0) {
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 99e7652..231aa97 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -8316,7 +8316,7 @@
#endif
};
-static void inline vlan_features_add(struct net_device *dev, unsigned long flags)
+static void inline vlan_features_add(struct net_device *dev, u32 flags)
{
dev->vlan_features |= flags;
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 163e0b0..1df9f0e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1372,8 +1372,8 @@
{
struct slave *slave;
struct net_device *bond_dev = bond->dev;
- unsigned long features = bond_dev->features;
- unsigned long vlan_features = 0;
+ u32 features = bond_dev->features;
+ u32 vlan_features = 0;
unsigned short max_hard_header_len = max((u16)ETH_HLEN,
bond_dev->hard_header_len);
int i;
@@ -1400,8 +1400,8 @@
done:
features |= (bond_dev->features & BOND_VLAN_FEATURES);
- bond_dev->features = netdev_fix_features(features, NULL);
- bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL);
+ bond_dev->features = netdev_fix_features(bond_dev, features);
+ bond_dev->vlan_features = netdev_fix_features(bond_dev, vlan_features);
bond_dev->hard_header_len = max_hard_header_len;
return 0;
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 8fd0174..72bb0f6 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1198,7 +1198,7 @@
bond->dev->name, new_value);
}
out:
- return count;
+ return ret;
}
static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
bonding_show_carrier, bonding_store_carrier);
@@ -1595,7 +1595,7 @@
}
}
out:
- return count;
+ return ret;
}
static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
bonding_show_slaves_active, bonding_store_slaves_active);
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index e610e13..00bf595 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -364,6 +364,7 @@
/* structs defined in e1000_hw.h */
struct e1000_hw hw;
+ spinlock_t stats64_lock;
struct e1000_hw_stats stats;
struct e1000_phy_info phy_info;
struct e1000_phy_stats phy_stats;
@@ -494,7 +495,9 @@
extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter);
extern void e1000e_free_rx_resources(struct e1000_adapter *adapter);
extern void e1000e_free_tx_resources(struct e1000_adapter *adapter);
-extern void e1000e_update_stats(struct e1000_adapter *adapter);
+extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64
+ *stats);
extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
extern void e1000e_get_hw_control(struct e1000_adapter *adapter);
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index fa08b63..daa7fe4 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -46,15 +46,15 @@
};
#define E1000_STAT(str, m) { \
- .stat_string = str, \
- .type = E1000_STATS, \
- .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
- .stat_offset = offsetof(struct e1000_adapter, m) }
+ .stat_string = str, \
+ .type = E1000_STATS, \
+ .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
+ .stat_offset = offsetof(struct e1000_adapter, m) }
#define E1000_NETDEV_STAT(str, m) { \
- .stat_string = str, \
- .type = NETDEV_STATS, \
- .sizeof_stat = sizeof(((struct net_device *)0)->m), \
- .stat_offset = offsetof(struct net_device, m) }
+ .stat_string = str, \
+ .type = NETDEV_STATS, \
+ .sizeof_stat = sizeof(((struct rtnl_link_stats64 *)0)->m), \
+ .stat_offset = offsetof(struct rtnl_link_stats64, m) }
static const struct e1000_stats e1000_gstrings_stats[] = {
E1000_STAT("rx_packets", stats.gprc),
@@ -65,21 +65,21 @@
E1000_STAT("tx_broadcast", stats.bptc),
E1000_STAT("rx_multicast", stats.mprc),
E1000_STAT("tx_multicast", stats.mptc),
- E1000_NETDEV_STAT("rx_errors", stats.rx_errors),
- E1000_NETDEV_STAT("tx_errors", stats.tx_errors),
- E1000_NETDEV_STAT("tx_dropped", stats.tx_dropped),
+ E1000_NETDEV_STAT("rx_errors", rx_errors),
+ E1000_NETDEV_STAT("tx_errors", tx_errors),
+ E1000_NETDEV_STAT("tx_dropped", tx_dropped),
E1000_STAT("multicast", stats.mprc),
E1000_STAT("collisions", stats.colc),
- E1000_NETDEV_STAT("rx_length_errors", stats.rx_length_errors),
- E1000_NETDEV_STAT("rx_over_errors", stats.rx_over_errors),
+ E1000_NETDEV_STAT("rx_length_errors", rx_length_errors),
+ E1000_NETDEV_STAT("rx_over_errors", rx_over_errors),
E1000_STAT("rx_crc_errors", stats.crcerrs),
- E1000_NETDEV_STAT("rx_frame_errors", stats.rx_frame_errors),
+ E1000_NETDEV_STAT("rx_frame_errors", rx_frame_errors),
E1000_STAT("rx_no_buffer_count", stats.rnbc),
E1000_STAT("rx_missed_errors", stats.mpc),
E1000_STAT("tx_aborted_errors", stats.ecol),
E1000_STAT("tx_carrier_errors", stats.tncrs),
- E1000_NETDEV_STAT("tx_fifo_errors", stats.tx_fifo_errors),
- E1000_NETDEV_STAT("tx_heartbeat_errors", stats.tx_heartbeat_errors),
+ E1000_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors),
+ E1000_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors),
E1000_STAT("tx_window_errors", stats.latecol),
E1000_STAT("tx_abort_late_coll", stats.latecol),
E1000_STAT("tx_deferred_ok", stats.dc),
@@ -684,20 +684,13 @@
rx_old = adapter->rx_ring;
err = -ENOMEM;
- tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+ tx_ring = kmemdup(tx_old, sizeof(struct e1000_ring), GFP_KERNEL);
if (!tx_ring)
goto err_alloc_tx;
- /*
- * use a memcpy to save any previously configured
- * items like napi structs from having to be
- * reinitialized
- */
- memcpy(tx_ring, tx_old, sizeof(struct e1000_ring));
- rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+ rx_ring = kmemdup(rx_old, sizeof(struct e1000_ring), GFP_KERNEL);
if (!rx_ring)
goto err_alloc_rx;
- memcpy(rx_ring, rx_old, sizeof(struct e1000_ring));
adapter->tx_ring = tx_ring;
adapter->rx_ring = rx_ring;
@@ -1255,7 +1248,6 @@
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl_reg = 0;
- u32 stat_reg = 0;
u16 phy_reg = 0;
s32 ret_val = 0;
@@ -1363,8 +1355,7 @@
* Set the ILOS bit on the fiber Nic if half duplex link is
* detected.
*/
- stat_reg = er32(STATUS);
- if ((stat_reg & E1000_STATUS_FD) == 0)
+ if ((er32(STATUS) & E1000_STATUS_FD) == 0)
ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
}
@@ -1982,14 +1973,15 @@
u64 *data)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
+ struct rtnl_link_stats64 net_stats;
int i;
char *p = NULL;
- e1000e_update_stats(adapter);
+ e1000e_get_stats64(netdev, &net_stats);
for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
switch (e1000_gstrings_stats[i].type) {
case NETDEV_STATS:
- p = (char *) netdev +
+ p = (char *) &net_stats +
e1000_gstrings_stats[i].stat_offset;
break;
case E1000_STATS:
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index fb46974..232b42b 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -2104,7 +2104,6 @@
{
union ich8_hws_flash_status hsfsts;
s32 ret_val = -E1000_ERR_NVM;
- s32 i = 0;
hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
@@ -2140,6 +2139,8 @@
ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
ret_val = 0;
} else {
+ s32 i = 0;
+
/*
* Otherwise poll for sometime so the current
* cycle has a chance to end before giving up.
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 68aa174..96921de 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -1978,15 +1978,15 @@
{
struct e1000_nvm_info *nvm = &hw->nvm;
u32 eecd = er32(EECD);
- u16 timeout = 0;
u8 spi_stat_reg;
if (nvm->type == e1000_nvm_eeprom_spi) {
+ u16 timeout = NVM_MAX_RETRY_SPI;
+
/* Clear SK and CS */
eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
ew32(EECD, eecd);
udelay(1);
- timeout = NVM_MAX_RETRY_SPI;
/*
* Read "Status Register" repeatedly until the LSB is cleared.
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 1c18f26..5b916b0 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -900,8 +900,6 @@
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
- netdev->stats.rx_bytes += total_rx_bytes;
- netdev->stats.rx_packets += total_rx_packets;
return cleaned;
}
@@ -1057,8 +1055,6 @@
}
adapter->total_tx_bytes += total_tx_bytes;
adapter->total_tx_packets += total_tx_packets;
- netdev->stats.tx_bytes += total_tx_bytes;
- netdev->stats.tx_packets += total_tx_packets;
return count < tx_ring->count;
}
@@ -1245,8 +1241,6 @@
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
- netdev->stats.rx_bytes += total_rx_bytes;
- netdev->stats.rx_packets += total_rx_packets;
return cleaned;
}
@@ -1426,8 +1420,6 @@
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
- netdev->stats.rx_bytes += total_rx_bytes;
- netdev->stats.rx_packets += total_rx_packets;
return cleaned;
}
@@ -2728,7 +2720,6 @@
{
struct e1000_hw *hw = &adapter->hw;
u32 rctl, rfctl;
- u32 psrctl = 0;
u32 pages = 0;
/* Workaround Si errata on 82579 - configure jumbo frame flow */
@@ -2827,6 +2818,8 @@
adapter->rx_ps_pages = 0;
if (adapter->rx_ps_pages) {
+ u32 psrctl = 0;
+
/* Configure extra packet-split registers */
rfctl = er32(RFCTL);
rfctl |= E1000_RFCTL_EXTEN;
@@ -3028,7 +3021,6 @@
struct netdev_hw_addr *ha;
u8 *mta_list;
u32 rctl;
- int i;
/* Check for Promiscuous and All Multicast modes */
@@ -3051,12 +3043,13 @@
ew32(RCTL, rctl);
if (!netdev_mc_empty(netdev)) {
+ int i = 0;
+
mta_list = kmalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
if (!mta_list)
return;
/* prepare a packed array of only addresses. */
- i = 0;
netdev_for_each_mc_addr(ha, netdev)
memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
@@ -3338,6 +3331,8 @@
return 0;
}
+static void e1000e_update_stats(struct e1000_adapter *adapter);
+
void e1000e_down(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
@@ -3372,6 +3367,11 @@
del_timer_sync(&adapter->phy_info_timer);
netif_carrier_off(netdev);
+
+ spin_lock(&adapter->stats64_lock);
+ e1000e_update_stats(adapter);
+ spin_unlock(&adapter->stats64_lock);
+
adapter->link_speed = 0;
adapter->link_duplex = 0;
@@ -3413,6 +3413,8 @@
adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+ spin_lock_init(&adapter->stats64_lock);
+
e1000e_set_interrupt_capability(adapter);
if (e1000_alloc_queues(adapter))
@@ -3886,7 +3888,7 @@
* e1000e_update_stats - Update the board statistics counters
* @adapter: board private structure
**/
-void e1000e_update_stats(struct e1000_adapter *adapter)
+static void e1000e_update_stats(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
struct e1000_hw *hw = &adapter->hw;
@@ -3998,10 +4000,11 @@
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_phy_regs *phy = &adapter->phy_regs;
- int ret_val;
if ((er32(STATUS) & E1000_STATUS_LU) &&
(adapter->hw.phy.media_type == e1000_media_type_copper)) {
+ int ret_val;
+
ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr);
ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr);
ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise);
@@ -4147,7 +4150,6 @@
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_hw *hw = &adapter->hw;
u32 link, tctl;
- int tx_pending = 0;
link = e1000e_has_link(adapter);
if ((netif_carrier_ok(netdev)) && link) {
@@ -4285,7 +4287,9 @@
}
link_up:
+ spin_lock(&adapter->stats64_lock);
e1000e_update_stats(adapter);
+ spin_unlock(&adapter->stats64_lock);
mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
adapter->tpt_old = adapter->stats.tpt;
@@ -4299,21 +4303,18 @@
e1000e_update_adaptive(&adapter->hw);
- if (!netif_carrier_ok(netdev)) {
- tx_pending = (e1000_desc_unused(tx_ring) + 1 <
- tx_ring->count);
- if (tx_pending) {
- /*
- * We've lost link, so the controller stops DMA,
- * but we've got queued Tx work that's never going
- * to get done, so reset controller to flush Tx.
- * (Do the reset outside of interrupt context).
- */
- adapter->tx_timeout_count++;
- schedule_work(&adapter->reset_task);
- /* return immediately since reset is imminent */
- return;
- }
+ if (!netif_carrier_ok(netdev) &&
+ (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) {
+ /*
+ * We've lost link, so the controller stops DMA,
+ * but we've got queued Tx work that's never going
+ * to get done, so reset controller to flush Tx.
+ * (Do the reset outside of interrupt context).
+ */
+ adapter->tx_timeout_count++;
+ schedule_work(&adapter->reset_task);
+ /* return immediately since reset is imminent */
+ return;
}
/* Simple mode for Interrupt Throttle Rate (ITR) */
@@ -4384,13 +4385,13 @@
u32 cmd_length = 0;
u16 ipcse = 0, tucse, mss;
u8 ipcss, ipcso, tucss, tucso, hdr_len;
- int err;
if (!skb_is_gso(skb))
return 0;
if (skb_header_cloned(skb)) {
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+
if (err)
return err;
}
@@ -4897,16 +4898,55 @@
}
/**
- * e1000_get_stats - Get System Network Statistics
+ * e1000_get_stats64 - Get System Network Statistics
* @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
*
* Returns the address of the device statistics structure.
- * The statistics are actually updated from the timer callback.
**/
-static struct net_device_stats *e1000_get_stats(struct net_device *netdev)
+struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
{
- /* only return the current stats */
- return &netdev->stats;
+ struct e1000_adapter *adapter = netdev_priv(netdev);
+
+ memset(stats, 0, sizeof(struct rtnl_link_stats64));
+ spin_lock(&adapter->stats64_lock);
+ e1000e_update_stats(adapter);
+ /* Fill out the OS statistics structure */
+ stats->rx_bytes = adapter->stats.gorc;
+ stats->rx_packets = adapter->stats.gprc;
+ stats->tx_bytes = adapter->stats.gotc;
+ stats->tx_packets = adapter->stats.gptc;
+ stats->multicast = adapter->stats.mprc;
+ stats->collisions = adapter->stats.colc;
+
+ /* Rx Errors */
+
+ /*
+ * RLEC on some newer hardware can be incorrect so build
+ * our own version based on RUC and ROC
+ */
+ stats->rx_errors = adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.ruc + adapter->stats.roc +
+ adapter->stats.cexterr;
+ stats->rx_length_errors = adapter->stats.ruc +
+ adapter->stats.roc;
+ stats->rx_crc_errors = adapter->stats.crcerrs;
+ stats->rx_frame_errors = adapter->stats.algnerrc;
+ stats->rx_missed_errors = adapter->stats.mpc;
+
+ /* Tx Errors */
+ stats->tx_errors = adapter->stats.ecol +
+ adapter->stats.latecol;
+ stats->tx_aborted_errors = adapter->stats.ecol;
+ stats->tx_window_errors = adapter->stats.latecol;
+ stats->tx_carrier_errors = adapter->stats.tncrs;
+
+ /* Tx Dropped needs to be maintained elsewhere */
+
+ spin_unlock(&adapter->stats64_lock);
+ return stats;
}
/**
@@ -5476,9 +5516,10 @@
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
- int vector, msix_irq;
if (adapter->msix_entries) {
+ int vector, msix_irq;
+
vector = 0;
msix_irq = adapter->msix_entries[vector].vector;
disable_irq(msix_irq);
@@ -5675,7 +5716,7 @@
.ndo_open = e1000_open,
.ndo_stop = e1000_close,
.ndo_start_xmit = e1000_xmit_frame,
- .ndo_get_stats = e1000_get_stats,
+ .ndo_get_stats64 = e1000e_get_stats64,
.ndo_set_multicast_list = e1000_set_multi,
.ndo_set_mac_address = e1000_set_mac,
.ndo_change_mtu = e1000_change_mtu,
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c
index 6bea051..6ae31fc 100644
--- a/drivers/net/e1000e/phy.c
+++ b/drivers/net/e1000e/phy.c
@@ -2409,9 +2409,7 @@
s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
{
s32 ret_val;
- u32 page_select = 0;
u32 page = offset >> IGP_PAGE_SHIFT;
- u32 page_shift = 0;
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
@@ -2427,6 +2425,8 @@
hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
if (offset > MAX_PHY_MULTI_PAGE_REG) {
+ u32 page_shift, page_select;
+
/*
* Page select is register 31 for phy address 1 and 22 for
* phy address 2 and 3. Page select is shifted only for
@@ -2468,9 +2468,7 @@
s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
{
s32 ret_val;
- u32 page_select = 0;
u32 page = offset >> IGP_PAGE_SHIFT;
- u32 page_shift = 0;
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
@@ -2486,6 +2484,8 @@
hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
if (offset > MAX_PHY_MULTI_PAGE_REG) {
+ u32 page_shift, page_select;
+
/*
* Page select is register 31 for phy address 1 and 22 for
* phy address 2 and 3. Page select is shifted only for
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index a937f49..ca3be4f 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -32,8 +32,8 @@
#define DRV_NAME "enic"
#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION "1.4.1.10"
-#define DRV_COPYRIGHT "Copyright 2008-2010 Cisco Systems, Inc"
+#define DRV_VERSION "2.1.1.2"
+#define DRV_COPYRIGHT "Copyright 2008-2011 Cisco Systems, Inc"
#define ENIC_BARS_MAX 6
@@ -49,7 +49,7 @@
void *devid;
};
-#define ENIC_SET_APPLIED (1 << 0)
+#define ENIC_PORT_REQUEST_APPLIED (1 << 0)
#define ENIC_SET_REQUEST (1 << 1)
#define ENIC_SET_NAME (1 << 2)
#define ENIC_SET_INSTANCE (1 << 3)
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index a0af48c..89664c6 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1318,18 +1318,20 @@
vic_provinfo_free(vp);
if (err)
return err;
-
- enic->pp.set |= ENIC_SET_APPLIED;
break;
case PORT_REQUEST_DISASSOCIATE:
- enic->pp.set &= ~ENIC_SET_APPLIED;
break;
default:
return -EINVAL;
}
+ /* Set flag to indicate that the port assoc/disassoc
+ * request has been sent out to fw
+ */
+ enic->pp.set |= ENIC_PORT_REQUEST_APPLIED;
+
return 0;
}
@@ -1411,7 +1413,7 @@
int err, error, done;
u16 response = PORT_PROFILE_RESPONSE_SUCCESS;
- if (!(enic->pp.set & ENIC_SET_APPLIED))
+ if (!(enic->pp.set & ENIC_PORT_REQUEST_APPLIED))
return -ENODATA;
err = enic_dev_init_done(enic, &done, &error);
diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c
index 0a2368f..c1552b6 100644
--- a/drivers/net/igb/e1000_82575.c
+++ b/drivers/net/igb/e1000_82575.c
@@ -129,6 +129,7 @@
break;
case E1000_DEV_ID_82580_COPPER:
case E1000_DEV_ID_82580_FIBER:
+ case E1000_DEV_ID_82580_QUAD_FIBER:
case E1000_DEV_ID_82580_SERDES:
case E1000_DEV_ID_82580_SGMII:
case E1000_DEV_ID_82580_COPPER_DUAL:
diff --git a/drivers/net/igb/e1000_hw.h b/drivers/net/igb/e1000_hw.h
index e2638af..281324e 100644
--- a/drivers/net/igb/e1000_hw.h
+++ b/drivers/net/igb/e1000_hw.h
@@ -54,6 +54,7 @@
#define E1000_DEV_ID_82580_SERDES 0x1510
#define E1000_DEV_ID_82580_SGMII 0x1511
#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527
#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438
#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A
#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 58c665b..200cc32 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -68,6 +68,7 @@
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index ea5cfe2..a7f2eed 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -253,7 +253,7 @@
unsigned long serial_number;
int vendor_specific_offset;
int fw_multicast_support;
- unsigned long features;
+ u32 features;
u32 max_tso6;
u32 read_dma;
u32 write_dma;
@@ -1776,7 +1776,7 @@
static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled)
{
struct myri10ge_priv *mgp = netdev_priv(netdev);
- unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO);
+ u32 flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO);
if (tso_enabled)
netdev->features |= flags;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index c7a6c44..9f6d670 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -592,8 +592,8 @@
ppp_release(NULL, file);
err = 0;
} else
- printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
- atomic_long_read(&file->f_count));
+ pr_warn("PPPIOCDETACH file->f_count=%ld\n",
+ atomic_long_read(&file->f_count));
mutex_unlock(&ppp_mutex);
return err;
}
@@ -630,7 +630,7 @@
if (pf->kind != INTERFACE) {
/* can't happen */
- printk(KERN_ERR "PPP: not interface or channel??\n");
+ pr_err("PPP: not interface or channel??\n");
return -EINVAL;
}
@@ -704,7 +704,8 @@
}
vj = slhc_init(val2+1, val+1);
if (!vj) {
- printk(KERN_ERR "PPP: no memory (VJ compressor)\n");
+ netdev_err(ppp->dev,
+ "PPP: no memory (VJ compressor)\n");
err = -ENOMEM;
break;
}
@@ -898,17 +899,17 @@
{
int err;
- printk(KERN_INFO "PPP generic driver version " PPP_VERSION "\n");
+ pr_info("PPP generic driver version " PPP_VERSION "\n");
err = register_pernet_device(&ppp_net_ops);
if (err) {
- printk(KERN_ERR "failed to register PPP pernet device (%d)\n", err);
+ pr_err("failed to register PPP pernet device (%d)\n", err);
goto out;
}
err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops);
if (err) {
- printk(KERN_ERR "failed to register PPP device (%d)\n", err);
+ pr_err("failed to register PPP device (%d)\n", err);
goto out_net;
}
@@ -1078,7 +1079,7 @@
new_skb = alloc_skb(new_skb_size, GFP_ATOMIC);
if (!new_skb) {
if (net_ratelimit())
- printk(KERN_ERR "PPP: no memory (comp pkt)\n");
+ netdev_err(ppp->dev, "PPP: no memory (comp pkt)\n");
return NULL;
}
if (ppp->dev->hard_header_len > PPP_HDRLEN)
@@ -1108,7 +1109,7 @@
* the same number.
*/
if (net_ratelimit())
- printk(KERN_ERR "ppp: compressor dropped pkt\n");
+ netdev_err(ppp->dev, "ppp: compressor dropped pkt\n");
kfree_skb(skb);
kfree_skb(new_skb);
new_skb = NULL;
@@ -1138,7 +1139,9 @@
if (ppp->pass_filter &&
sk_run_filter(skb, ppp->pass_filter) == 0) {
if (ppp->debug & 1)
- printk(KERN_DEBUG "PPP: outbound frame not passed\n");
+ netdev_printk(KERN_DEBUG, ppp->dev,
+ "PPP: outbound frame "
+ "not passed\n");
kfree_skb(skb);
return;
}
@@ -1164,7 +1167,7 @@
new_skb = alloc_skb(skb->len + ppp->dev->hard_header_len - 2,
GFP_ATOMIC);
if (!new_skb) {
- printk(KERN_ERR "PPP: no memory (VJ comp pkt)\n");
+ netdev_err(ppp->dev, "PPP: no memory (VJ comp pkt)\n");
goto drop;
}
skb_reserve(new_skb, ppp->dev->hard_header_len - 2);
@@ -1202,7 +1205,9 @@
proto != PPP_LCP && proto != PPP_CCP) {
if (!(ppp->flags & SC_CCP_UP) && (ppp->flags & SC_MUST_COMP)) {
if (net_ratelimit())
- printk(KERN_ERR "ppp: compression required but down - pkt dropped.\n");
+ netdev_err(ppp->dev,
+ "ppp: compression required but "
+ "down - pkt dropped.\n");
goto drop;
}
skb = pad_compress_skb(ppp, skb);
@@ -1505,7 +1510,7 @@
noskb:
spin_unlock_bh(&pch->downl);
if (ppp->debug & 1)
- printk(KERN_ERR "PPP: no memory (fragment)\n");
+ netdev_err(ppp->dev, "PPP: no memory (fragment)\n");
++ppp->dev->stats.tx_errors;
++ppp->nxseq;
return 1; /* abandon the frame */
@@ -1686,7 +1691,8 @@
/* copy to a new sk_buff with more tailroom */
ns = dev_alloc_skb(skb->len + 128);
if (!ns) {
- printk(KERN_ERR"PPP: no memory (VJ decomp)\n");
+ netdev_err(ppp->dev, "PPP: no memory "
+ "(VJ decomp)\n");
goto err;
}
skb_reserve(ns, 2);
@@ -1699,7 +1705,8 @@
len = slhc_uncompress(ppp->vj, skb->data + 2, skb->len - 2);
if (len <= 0) {
- printk(KERN_DEBUG "PPP: VJ decompression error\n");
+ netdev_printk(KERN_DEBUG, ppp->dev,
+ "PPP: VJ decompression error\n");
goto err;
}
len += 2;
@@ -1721,7 +1728,7 @@
goto err;
if (slhc_remember(ppp->vj, skb->data + 2, skb->len - 2) <= 0) {
- printk(KERN_ERR "PPP: VJ uncompressed error\n");
+ netdev_err(ppp->dev, "PPP: VJ uncompressed error\n");
goto err;
}
proto = PPP_IP;
@@ -1762,8 +1769,9 @@
if (ppp->pass_filter &&
sk_run_filter(skb, ppp->pass_filter) == 0) {
if (ppp->debug & 1)
- printk(KERN_DEBUG "PPP: inbound frame "
- "not passed\n");
+ netdev_printk(KERN_DEBUG, ppp->dev,
+ "PPP: inbound frame "
+ "not passed\n");
kfree_skb(skb);
return;
}
@@ -1821,7 +1829,8 @@
ns = dev_alloc_skb(obuff_size);
if (!ns) {
- printk(KERN_ERR "ppp_decompress_frame: no memory\n");
+ netdev_err(ppp->dev, "ppp_decompress_frame: "
+ "no memory\n");
goto err;
}
/* the decompressor still expects the A/C bytes in the hdr */
@@ -1989,7 +1998,7 @@
u32 seq = ppp->nextseq;
u32 minseq = ppp->minseq;
struct sk_buff_head *list = &ppp->mrq;
- struct sk_buff *p, *next;
+ struct sk_buff *p, *tmp;
struct sk_buff *head, *tail;
struct sk_buff *skb = NULL;
int lost = 0, len = 0;
@@ -1998,13 +2007,15 @@
return NULL;
head = list->next;
tail = NULL;
- for (p = head; p != (struct sk_buff *) list; p = next) {
- next = p->next;
+ skb_queue_walk_safe(list, p, tmp) {
+ again:
if (seq_before(PPP_MP_CB(p)->sequence, seq)) {
/* this can't happen, anyway ignore the skb */
- printk(KERN_ERR "ppp_mp_reconstruct bad seq %u < %u\n",
- PPP_MP_CB(p)->sequence, seq);
- head = next;
+ netdev_err(ppp->dev, "ppp_mp_reconstruct bad "
+ "seq %u < %u\n",
+ PPP_MP_CB(p)->sequence, seq);
+ __skb_unlink(p, list);
+ kfree_skb(p);
continue;
}
if (PPP_MP_CB(p)->sequence != seq) {
@@ -2016,8 +2027,7 @@
lost = 1;
seq = seq_before(minseq, PPP_MP_CB(p)->sequence)?
minseq + 1: PPP_MP_CB(p)->sequence;
- next = p;
- continue;
+ goto again;
}
/*
@@ -2042,17 +2052,9 @@
(PPP_MP_CB(head)->BEbits & B)) {
if (len > ppp->mrru + 2) {
++ppp->dev->stats.rx_length_errors;
- printk(KERN_DEBUG "PPP: reconstructed packet"
- " is too long (%d)\n", len);
- } else if (p == head) {
- /* fragment is complete packet - reuse skb */
- tail = p;
- skb = skb_get(p);
- break;
- } else if ((skb = dev_alloc_skb(len)) == NULL) {
- ++ppp->dev->stats.rx_missed_errors;
- printk(KERN_DEBUG "PPP: no memory for "
- "reconstructed packet");
+ netdev_printk(KERN_DEBUG, ppp->dev,
+ "PPP: reconstructed packet"
+ " is too long (%d)\n", len);
} else {
tail = p;
break;
@@ -2065,9 +2067,17 @@
* and we haven't found a complete valid packet yet,
* we can discard up to and including this fragment.
*/
- if (PPP_MP_CB(p)->BEbits & E)
- head = next;
+ if (PPP_MP_CB(p)->BEbits & E) {
+ struct sk_buff *tmp2;
+ skb_queue_reverse_walk_from_safe(list, p, tmp2) {
+ __skb_unlink(p, list);
+ kfree_skb(p);
+ }
+ head = skb_peek(list);
+ if (!head)
+ break;
+ }
++seq;
}
@@ -2077,26 +2087,37 @@
signal a receive error. */
if (PPP_MP_CB(head)->sequence != ppp->nextseq) {
if (ppp->debug & 1)
- printk(KERN_DEBUG " missed pkts %u..%u\n",
- ppp->nextseq,
- PPP_MP_CB(head)->sequence-1);
+ netdev_printk(KERN_DEBUG, ppp->dev,
+ " missed pkts %u..%u\n",
+ ppp->nextseq,
+ PPP_MP_CB(head)->sequence-1);
++ppp->dev->stats.rx_dropped;
ppp_receive_error(ppp);
}
- if (head != tail)
- /* copy to a single skb */
- for (p = head; p != tail->next; p = p->next)
- skb_copy_bits(p, 0, skb_put(skb, p->len), p->len);
- ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
- head = tail->next;
- }
+ skb = head;
+ if (head != tail) {
+ struct sk_buff **fragpp = &skb_shinfo(skb)->frag_list;
+ p = skb_queue_next(list, head);
+ __skb_unlink(skb, list);
+ skb_queue_walk_from_safe(list, p, tmp) {
+ __skb_unlink(p, list);
+ *fragpp = p;
+ p->next = NULL;
+ fragpp = &p->next;
- /* Discard all the skbuffs that we have copied the data out of
- or that we can't use. */
- while ((p = list->next) != head) {
- __skb_unlink(p, list);
- kfree_skb(p);
+ skb->len += p->len;
+ skb->data_len += p->len;
+ skb->truesize += p->len;
+
+ if (p == tail)
+ break;
+ }
+ } else {
+ __skb_unlink(skb, list);
+ }
+
+ ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
}
return skb;
@@ -2617,8 +2638,8 @@
ret = register_netdev(dev);
if (ret != 0) {
unit_put(&pn->units_idr, unit);
- printk(KERN_ERR "PPP: couldn't register device %s (%d)\n",
- dev->name, ret);
+ netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n",
+ dev->name, ret);
goto out2;
}
@@ -2690,9 +2711,9 @@
if (!ppp->file.dead || ppp->n_channels) {
/* "can't happen" */
- printk(KERN_ERR "ppp: destroying ppp struct %p but dead=%d "
- "n_channels=%d !\n", ppp, ppp->file.dead,
- ppp->n_channels);
+ netdev_err(ppp->dev, "ppp: destroying ppp struct %p "
+ "but dead=%d n_channels=%d !\n",
+ ppp, ppp->file.dead, ppp->n_channels);
return;
}
@@ -2834,8 +2855,7 @@
if (!pch->file.dead) {
/* "can't happen" */
- printk(KERN_ERR "ppp: destroying undead channel %p !\n",
- pch);
+ pr_err("ppp: destroying undead channel %p !\n", pch);
return;
}
skb_queue_purge(&pch->file.xq);
@@ -2847,7 +2867,7 @@
{
/* should never happen */
if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count))
- printk(KERN_ERR "PPP: removing module but units remain!\n");
+ pr_err("PPP: removing module but units remain!\n");
unregister_chrdev(PPP_MAJOR, "ppp");
device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
class_destroy(ppp_class);
@@ -2865,7 +2885,7 @@
again:
if (!idr_pre_get(p, GFP_KERNEL)) {
- printk(KERN_ERR "PPP: No free memory for idr\n");
+ pr_err("PPP: No free memory for idr\n");
return -ENOMEM;
}
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 0e8bb19..713969a 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -502,7 +502,7 @@
static int efx_ethtool_set_tso(struct net_device *net_dev, u32 enable)
{
struct efx_nic *efx __attribute__ ((unused)) = netdev_priv(net_dev);
- unsigned long features;
+ u32 features;
features = NETIF_F_TSO;
if (efx->type->offload_features & NETIF_F_V6_CSUM)
@@ -519,7 +519,7 @@
static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable)
{
struct efx_nic *efx = netdev_priv(net_dev);
- unsigned long features = efx->type->offload_features & NETIF_F_ALL_CSUM;
+ u32 features = efx->type->offload_features & NETIF_F_ALL_CSUM;
if (enable)
net_dev->features |= features;
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index 28df866..c652702 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -906,7 +906,7 @@
unsigned int phys_addr_channels;
unsigned int tx_dc_base;
unsigned int rx_dc_base;
- unsigned long offload_features;
+ u32 offload_features;
u32 reset_world_flags;
};
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 726df61..43654a3 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -81,6 +81,7 @@
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/workqueue.h>
+#include <linux/of.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -2394,6 +2395,15 @@
return 0;
}
+#ifdef CONFIG_OF
+static const struct of_device_id smc91x_match[] = {
+ { .compatible = "smsc,lan91c94", },
+ { .compatible = "smsc,lan91c111", },
+ {},
+}
+MODULE_DEVICE_TABLE(of, smc91x_match);
+#endif
+
static struct dev_pm_ops smc_drv_pm_ops = {
.suspend = smc_drv_suspend,
.resume = smc_drv_resume,
@@ -2406,6 +2416,9 @@
.name = CARDNAME,
.owner = THIS_MODULE,
.pm = &smc_drv_pm_ops,
+#ifdef CONFIG_OF
+ .of_match_table = smc91x_match,
+#endif
},
};
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 93b32d3..cc06952 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4,7 +4,7 @@
* Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
* Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
* Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2005-2010 Broadcom Corporation.
+ * Copyright (C) 2005-2011 Broadcom Corporation.
*
* Firmware is:
* Derived from proprietary unpublished source code,
@@ -64,10 +64,10 @@
#define DRV_MODULE_NAME "tg3"
#define TG3_MAJ_NUM 3
-#define TG3_MIN_NUM 116
+#define TG3_MIN_NUM 117
#define DRV_MODULE_VERSION \
__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE "December 3, 2010"
+#define DRV_MODULE_RELDATE "January 25, 2011"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
@@ -1776,9 +1776,29 @@
tg3_phy_cl45_read(tp, MDIO_MMD_AN,
TG3_CL45_D7_EEERES_STAT, &val);
- if (val == TG3_CL45_D7_EEERES_STAT_LP_1000T ||
- val == TG3_CL45_D7_EEERES_STAT_LP_100TX)
+ switch (val) {
+ case TG3_CL45_D7_EEERES_STAT_LP_1000T:
+ switch (GET_ASIC_REV(tp->pci_chip_rev_id)) {
+ case ASIC_REV_5717:
+ case ASIC_REV_5719:
+ case ASIC_REV_57765:
+ /* Enable SM_DSP clock and tx 6dB coding. */
+ val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+
+ tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, 0x0000);
+
+ /* Turn off SM_DSP clock. */
+ val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+ }
+ /* Fallthrough */
+ case TG3_CL45_D7_EEERES_STAT_LP_100TX:
tp->setlpicnt = 2;
+ }
}
if (!tp->setlpicnt) {
@@ -2968,11 +2988,19 @@
MII_TG3_AUXCTL_ACTL_TX_6DB;
tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
- if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 ||
- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765) &&
- !tg3_phydsp_read(tp, MII_TG3_DSP_CH34TP2, &val))
- tg3_phydsp_write(tp, MII_TG3_DSP_CH34TP2,
- val | MII_TG3_DSP_CH34TP2_HIBW01);
+ switch (GET_ASIC_REV(tp->pci_chip_rev_id)) {
+ case ASIC_REV_5717:
+ case ASIC_REV_57765:
+ if (!tg3_phydsp_read(tp, MII_TG3_DSP_CH34TP2, &val))
+ tg3_phydsp_write(tp, MII_TG3_DSP_CH34TP2, val |
+ MII_TG3_DSP_CH34TP2_HIBW01);
+ /* Fall through */
+ case ASIC_REV_5719:
+ val = MII_TG3_DSP_TAP26_ALNOKO |
+ MII_TG3_DSP_TAP26_RMRXSTO |
+ MII_TG3_DSP_TAP26_OPCSINPT;
+ tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, val);
+ }
val = 0;
if (tp->link_config.autoneg == AUTONEG_ENABLE) {
@@ -7801,7 +7829,7 @@
TG3_CPMU_DBTMR1_LNKIDLE_2047US);
tw32_f(TG3_CPMU_EEE_DBTMR2,
- TG3_CPMU_DBTMR1_APE_TX_2047US |
+ TG3_CPMU_DBTMR2_APE_TX_2047US |
TG3_CPMU_DBTMR2_TXIDXEQ_2047US);
}
@@ -8075,8 +8103,9 @@
/* Program the jumbo buffer descriptor ring control
* blocks on those devices that have them.
*/
- if ((tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) &&
- !(tp->tg3_flags2 & TG3_FLG2_5780_CLASS)) {
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 ||
+ ((tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) &&
+ !(tp->tg3_flags2 & TG3_FLG2_5780_CLASS))) {
/* Setup replenish threshold. */
tw32(RCVBDI_JUMBO_THRESH, tp->rx_jumbo_pending / 8);
@@ -8194,8 +8223,12 @@
(tp->tg3_flags3 & TG3_FLG3_5717_PLUS)) {
val = tr32(TG3_RDMA_RSRVCTRL_REG);
if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
- val &= ~TG3_RDMA_RSRVCTRL_TXMRGN_MASK;
- val |= TG3_RDMA_RSRVCTRL_TXMRGN_320B;
+ val &= ~(TG3_RDMA_RSRVCTRL_TXMRGN_MASK |
+ TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK |
+ TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK);
+ val |= TG3_RDMA_RSRVCTRL_TXMRGN_320B |
+ TG3_RDMA_RSRVCTRL_FIFO_LWM_1_5K |
+ TG3_RDMA_RSRVCTRL_FIFO_HWM_1_5K;
}
tw32(TG3_RDMA_RSRVCTRL_REG,
val | TG3_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
@@ -8317,7 +8350,8 @@
tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
udelay(100);
- if (tp->tg3_flags2 & TG3_FLG2_USING_MSIX) {
+ if ((tp->tg3_flags2 & TG3_FLG2_USING_MSIX) &&
+ tp->irq_cnt > 1) {
val = tr32(MSGINT_MODE);
val |= MSGINT_MODE_MULTIVEC_EN | MSGINT_MODE_ENABLE;
tw32(MSGINT_MODE, val);
@@ -9057,7 +9091,8 @@
if (tp->tg3_flags2 & TG3_FLG2_USING_MSI_OR_MSIX) {
u32 msi_mode = tr32(MSGINT_MODE);
- if (tp->tg3_flags2 & TG3_FLG2_USING_MSIX)
+ if ((tp->tg3_flags2 & TG3_FLG2_USING_MSIX) &&
+ tp->irq_cnt > 1)
msi_mode |= MSGINT_MODE_MULTIVEC_EN;
tw32(MSGINT_MODE, msi_mode | MSGINT_MODE_ENABLE);
}
@@ -10833,13 +10868,16 @@
if (loopback_mode == TG3_MAC_LOOPBACK) {
/* HW errata - mac loopback fails in some cases on 5780.
* Normal traffic and PHY loopback are not affected by
- * errata.
+ * errata. Also, the MAC loopback test is deprecated for
+ * all newer ASIC revisions.
*/
- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780)
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780 ||
+ (tp->tg3_flags & TG3_FLAG_CPMU_PRESENT))
return 0;
- mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) |
- MAC_MODE_PORT_INT_LPBACK;
+ mac_mode = tp->mac_mode &
+ ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
+ mac_mode |= MAC_MODE_PORT_INT_LPBACK;
if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
mac_mode |= MAC_MODE_LINK_POLARITY;
if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
@@ -10861,7 +10899,8 @@
tg3_writephy(tp, MII_BMCR, val);
udelay(40);
- mac_mode = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
+ mac_mode = tp->mac_mode &
+ ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
tg3_writephy(tp, MII_TG3_FET_PTEST,
MII_TG3_FET_PTEST_FRC_TX_LINK |
@@ -10889,6 +10928,13 @@
MII_TG3_EXT_CTRL_LNK3_LED_MODE);
}
tw32(MAC_MODE, mac_mode);
+
+ /* Wait for link */
+ for (i = 0; i < 100; i++) {
+ if (tr32(MAC_TX_STATUS) & TX_STATUS_LINK_UP)
+ break;
+ mdelay(1);
+ }
} else {
return -EINVAL;
}
@@ -10995,14 +11041,19 @@
static int tg3_test_loopback(struct tg3 *tp)
{
int err = 0;
- u32 cpmuctrl = 0;
+ u32 eee_cap, cpmuctrl = 0;
if (!netif_running(tp->dev))
return TG3_LOOPBACK_FAILED;
+ eee_cap = tp->phy_flags & TG3_PHYFLG_EEE_CAP;
+ tp->phy_flags &= ~TG3_PHYFLG_EEE_CAP;
+
err = tg3_reset_hw(tp, 1);
- if (err)
- return TG3_LOOPBACK_FAILED;
+ if (err) {
+ err = TG3_LOOPBACK_FAILED;
+ goto done;
+ }
/* Turn off gphy autopowerdown. */
if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
@@ -11022,8 +11073,10 @@
udelay(10);
}
- if (status != CPMU_MUTEX_GNT_DRIVER)
- return TG3_LOOPBACK_FAILED;
+ if (status != CPMU_MUTEX_GNT_DRIVER) {
+ err = TG3_LOOPBACK_FAILED;
+ goto done;
+ }
/* Turn off link-based power management. */
cpmuctrl = tr32(TG3_CPMU_CTRL);
@@ -11052,6 +11105,9 @@
if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
tg3_phy_toggle_apd(tp, true);
+done:
+ tp->phy_flags |= eee_cap;
+
return err;
}
@@ -11158,7 +11214,9 @@
if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
break; /* We have no PHY */
- if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)
+ if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
+ ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
+ !netif_running(dev)))
return -EAGAIN;
spin_lock_bh(&tp->lock);
@@ -11174,7 +11232,9 @@
if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
break; /* We have no PHY */
- if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)
+ if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
+ ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
+ !netif_running(dev)))
return -EAGAIN;
spin_lock_bh(&tp->lock);
@@ -13258,7 +13318,9 @@
}
/* Determine TSO capabilities */
- if (tp->tg3_flags3 & TG3_FLG3_5717_PLUS)
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0)
+ ; /* Do nothing. HW bug. */
+ else if (tp->tg3_flags3 & TG3_FLG3_5717_PLUS)
tp->tg3_flags2 |= TG3_FLG2_HW_TSO_3;
else if ((tp->tg3_flags3 & TG3_FLG3_5755_PLUS) ||
GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
@@ -13309,7 +13371,8 @@
tp->tg3_flags3 |= TG3_FLG3_40BIT_DMA_LIMIT_BUG;
}
- if (tp->tg3_flags3 & TG3_FLG3_5717_PLUS)
+ if ((tp->tg3_flags3 & TG3_FLG3_5717_PLUS) &&
+ tp->pci_chip_rev_id != CHIPREV_ID_5719_A0)
tp->tg3_flags3 |= TG3_FLG3_USE_JUMBO_BDFLAG;
if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) ||
@@ -13327,42 +13390,8 @@
tp->tg3_flags2 |= TG3_FLG2_PCI_EXPRESS;
tp->pcie_readrq = 4096;
- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
- u16 word;
-
- pci_read_config_word(tp->pdev,
- tp->pcie_cap + PCI_EXP_LNKSTA,
- &word);
- switch (word & PCI_EXP_LNKSTA_CLS) {
- case PCI_EXP_LNKSTA_CLS_2_5GB:
- word &= PCI_EXP_LNKSTA_NLW;
- word >>= PCI_EXP_LNKSTA_NLW_SHIFT;
- switch (word) {
- case 2:
- tp->pcie_readrq = 2048;
- break;
- case 4:
- tp->pcie_readrq = 1024;
- break;
- }
- break;
-
- case PCI_EXP_LNKSTA_CLS_5_0GB:
- word &= PCI_EXP_LNKSTA_NLW;
- word >>= PCI_EXP_LNKSTA_NLW_SHIFT;
- switch (word) {
- case 1:
- tp->pcie_readrq = 2048;
- break;
- case 2:
- tp->pcie_readrq = 1024;
- break;
- case 4:
- tp->pcie_readrq = 512;
- break;
- }
- }
- }
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+ tp->pcie_readrq = 2048;
pcie_set_readrq(tp->pdev, tp->pcie_readrq);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index f528243..73884b6 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -4,7 +4,7 @@
* Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
* Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
* Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2007-2010 Broadcom Corporation.
+ * Copyright (C) 2007-2011 Broadcom Corporation.
*/
#ifndef _T3_H
@@ -141,6 +141,7 @@
#define CHIPREV_ID_57780_A1 0x57780001
#define CHIPREV_ID_5717_A0 0x05717000
#define CHIPREV_ID_57765_A0 0x57785000
+#define CHIPREV_ID_5719_A0 0x05719000
#define GET_ASIC_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 12)
#define ASIC_REV_5700 0x07
#define ASIC_REV_5701 0x00
@@ -1105,7 +1106,7 @@
#define TG3_CPMU_DBTMR1_PCIEXIT_2047US 0x07ff0000
#define TG3_CPMU_DBTMR1_LNKIDLE_2047US 0x000070ff
#define TG3_CPMU_EEE_DBTMR2 0x000036b8
-#define TG3_CPMU_DBTMR1_APE_TX_2047US 0x07ff0000
+#define TG3_CPMU_DBTMR2_APE_TX_2047US 0x07ff0000
#define TG3_CPMU_DBTMR2_TXIDXEQ_2047US 0x000070ff
#define TG3_CPMU_EEE_LNKIDL_CTRL 0x000036bc
#define TG3_CPMU_EEE_LNKIDL_PCIE_NL0 0x01000000
@@ -1333,6 +1334,10 @@
#define TG3_RDMA_RSRVCTRL_REG 0x00004900
#define TG3_RDMA_RSRVCTRL_FIFO_OFLW_FIX 0x00000004
+#define TG3_RDMA_RSRVCTRL_FIFO_LWM_1_5K 0x00000c00
+#define TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK 0x00000ff0
+#define TG3_RDMA_RSRVCTRL_FIFO_HWM_1_5K 0x000c0000
+#define TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK 0x000ff000
#define TG3_RDMA_RSRVCTRL_TXMRGN_320B 0x28000000
#define TG3_RDMA_RSRVCTRL_TXMRGN_MASK 0xffe00000
/* 0x4904 --> 0x4910 unused */
@@ -2108,6 +2113,10 @@
#define MII_TG3_DSP_TAP1 0x0001
#define MII_TG3_DSP_TAP1_AGCTGT_DFLT 0x0007
+#define MII_TG3_DSP_TAP26 0x001a
+#define MII_TG3_DSP_TAP26_ALNOKO 0x0001
+#define MII_TG3_DSP_TAP26_RMRXSTO 0x0002
+#define MII_TG3_DSP_TAP26_OPCSINPT 0x0004
#define MII_TG3_DSP_AADJ1CH0 0x001f
#define MII_TG3_DSP_CH34TP2 0x4022
#define MII_TG3_DSP_CH34TP2_HIBW01 0x0010
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index f8e463c..0678e7e 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -63,45 +63,45 @@
* - Other minor stuff
*
* v1.4 Feb 10, 2000 - Updated with more changes required after Dave's
- * network cleanup in 2.3.43pre7 (Tigran & myself)
- * - Minor stuff.
+ * network cleanup in 2.3.43pre7 (Tigran & myself)
+ * - Minor stuff.
*
- * v1.5 March 22, 2000 - Fixed another timer bug that would hang the driver
- * if no cable/link were present.
+ * v1.5 March 22, 2000 - Fixed another timer bug that would hang the
+ * driver if no cable/link were present.
* - Cosmetic changes.
* - TODO: Port completely to new PCI/DMA API
- * Auto-Neg fallback.
+ * Auto-Neg fallback.
*
- * v1.6 April 04, 2000 - Fixed driver support for kernel-parameters. Haven't
- * tested it though, as the kernel support is currently
- * broken (2.3.99p4p3).
- * - Updated tlan.txt accordingly.
- * - Adjusted minimum/maximum frame length.
- * - There is now a TLAN website up at
- * http://hp.sourceforge.net/
+ * v1.6 April 04, 2000 - Fixed driver support for kernel-parameters.
+ * Haven't tested it though, as the kernel support
+ * is currently broken (2.3.99p4p3).
+ * - Updated tlan.txt accordingly.
+ * - Adjusted minimum/maximum frame length.
+ * - There is now a TLAN website up at
+ * http://hp.sourceforge.net/
*
- * v1.7 April 07, 2000 - Started to implement custom ioctls. Driver now
- * reports PHY information when used with Donald
- * Beckers userspace MII diagnostics utility.
+ * v1.7 April 07, 2000 - Started to implement custom ioctls. Driver now
+ * reports PHY information when used with Donald
+ * Beckers userspace MII diagnostics utility.
*
- * v1.8 April 23, 2000 - Fixed support for forced speed/duplex settings.
- * - Added link information to Auto-Neg and forced
- * modes. When NIC operates with auto-neg the driver
- * will report Link speed & duplex modes as well as
- * link partner abilities. When forced link is used,
- * the driver will report status of the established
- * link.
- * Please read tlan.txt for additional information.
- * - Removed call to check_region(), and used
- * return value of request_region() instead.
+ * v1.8 April 23, 2000 - Fixed support for forced speed/duplex settings.
+ * - Added link information to Auto-Neg and forced
+ * modes. When NIC operates with auto-neg the driver
+ * will report Link speed & duplex modes as well as
+ * link partner abilities. When forced link is used,
+ * the driver will report status of the established
+ * link.
+ * Please read tlan.txt for additional information.
+ * - Removed call to check_region(), and used
+ * return value of request_region() instead.
*
* v1.8a May 28, 2000 - Minor updates.
*
* v1.9 July 25, 2000 - Fixed a few remaining Full-Duplex issues.
- * - Updated with timer fixes from Andrew Morton.
- * - Fixed module race in TLan_Open.
- * - Added routine to monitor PHY status.
- * - Added activity led support for Proliant devices.
+ * - Updated with timer fixes from Andrew Morton.
+ * - Fixed module race in TLan_Open.
+ * - Added routine to monitor PHY status.
+ * - Added activity led support for Proliant devices.
*
* v1.10 Aug 30, 2000 - Added support for EISA based tlan controllers
* like the Compaq NetFlex3/E.
@@ -111,8 +111,8 @@
* hardware probe is done with kernel API and
* TLan_EisaProbe.
* - Adjusted debug information for probing.
- * - Fixed bug that would cause general debug information
- * to be printed after driver removal.
+ * - Fixed bug that would cause general debug
+ * information to be printed after driver removal.
* - Added transmit timeout handling.
* - Fixed OOM return values in tlan_probe.
* - Fixed possible mem leak in tlan_exit
@@ -136,8 +136,8 @@
*
* v1.12 Oct 12, 2000 - Minor fixes (memleak, init, etc.)
*
- * v1.13 Nov 28, 2000 - Stop flooding console with auto-neg issues
- * when link can't be established.
+ * v1.13 Nov 28, 2000 - Stop flooding console with auto-neg issues
+ * when link can't be established.
* - Added the bbuf option as a kernel parameter.
* - Fixed ioaddr probe bug.
* - Fixed stupid deadlock with MII interrupts.
@@ -147,28 +147,30 @@
* TLAN v1.0 silicon. This needs to be investigated
* further.
*
- * v1.14 Dec 16, 2000 - Added support for servicing multiple frames per.
- * interrupt. Thanks goes to
- * Adam Keys <adam@ti.com>
- * Denis Beaudoin <dbeaudoin@ti.com>
- * for providing the patch.
- * - Fixed auto-neg output when using multiple
- * adapters.
- * - Converted to use new taskq interface.
+ * v1.14 Dec 16, 2000 - Added support for servicing multiple frames per.
+ * interrupt. Thanks goes to
+ * Adam Keys <adam@ti.com>
+ * Denis Beaudoin <dbeaudoin@ti.com>
+ * for providing the patch.
+ * - Fixed auto-neg output when using multiple
+ * adapters.
+ * - Converted to use new taskq interface.
*
- * v1.14a Jan 6, 2001 - Minor adjustments (spinlocks, etc.)
+ * v1.14a Jan 6, 2001 - Minor adjustments (spinlocks, etc.)
*
* Samuel Chessman <chessman@tux.org> New Maintainer!
*
* v1.15 Apr 4, 2002 - Correct operation when aui=1 to be
- * 10T half duplex no loopback
- * Thanks to Gunnar Eikman
+ * 10T half duplex no loopback
+ * Thanks to Gunnar Eikman
*
* Sakari Ailus <sakari.ailus@iki.fi>:
*
* v1.15a Dec 15 2008 - Remove bbuf support, it doesn't work anyway.
+ * v1.16 Jan 6 2011 - Make checkpatch.pl happy.
+ * v1.17 Jan 6 2011 - Add suspend/resume support.
*
- *******************************************************************************/
+ ******************************************************************************/
#include <linux/module.h>
#include <linux/init.h>
@@ -185,13 +187,11 @@
#include "tlan.h"
-typedef u32 (TLanIntVectorFunc)( struct net_device *, u16 );
-
/* For removing EISA devices */
-static struct net_device *TLan_Eisa_Devices;
+static struct net_device *tlan_eisa_devices;
-static int TLanDevicesInstalled;
+static int tlan_devices_installed;
/* Set speed, duplex and aui settings */
static int aui[MAX_TLAN_BOARDS];
@@ -202,7 +202,8 @@
module_param_array(duplex, int, NULL, 0);
module_param_array(speed, int, NULL, 0);
MODULE_PARM_DESC(aui, "ThunderLAN use AUI port(s) (0-1)");
-MODULE_PARM_DESC(duplex, "ThunderLAN duplex setting(s) (0-default, 1-half, 2-full)");
+MODULE_PARM_DESC(duplex,
+ "ThunderLAN duplex setting(s) (0-default, 1-half, 2-full)");
MODULE_PARM_DESC(speed, "ThunderLAN port speen setting(s) (0,10,100)");
MODULE_AUTHOR("Maintainer: Samuel Chessman <chessman@tux.org>");
@@ -218,139 +219,144 @@
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "ThunderLAN debug mask");
-static const char TLanSignature[] = "TLAN";
-static const char tlan_banner[] = "ThunderLAN driver v1.15a\n";
+static const char tlan_signature[] = "TLAN";
+static const char tlan_banner[] = "ThunderLAN driver v1.17\n";
static int tlan_have_pci;
static int tlan_have_eisa;
-static const char *media[] = {
- "10BaseT-HD ", "10BaseT-FD ","100baseTx-HD ",
- "100baseTx-FD", "100baseT4", NULL
+static const char * const media[] = {
+ "10BaseT-HD", "10BaseT-FD", "100baseTx-HD",
+ "100BaseTx-FD", "100BaseT4", NULL
};
static struct board {
- const char *deviceLabel;
- u32 flags;
- u16 addrOfs;
+ const char *device_label;
+ u32 flags;
+ u16 addr_ofs;
} board_info[] = {
{ "Compaq Netelligent 10 T PCI UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 },
- { "Compaq Netelligent 10/100 TX PCI UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 },
+ { "Compaq Netelligent 10/100 TX PCI UTP",
+ TLAN_ADAPTER_ACTIVITY_LED, 0x83 },
{ "Compaq Integrated NetFlex-3/P", TLAN_ADAPTER_NONE, 0x83 },
{ "Compaq NetFlex-3/P",
TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 },
{ "Compaq NetFlex-3/P", TLAN_ADAPTER_NONE, 0x83 },
{ "Compaq Netelligent Integrated 10/100 TX UTP",
TLAN_ADAPTER_ACTIVITY_LED, 0x83 },
- { "Compaq Netelligent Dual 10/100 TX PCI UTP", TLAN_ADAPTER_NONE, 0x83 },
- { "Compaq Netelligent 10/100 TX Embedded UTP", TLAN_ADAPTER_NONE, 0x83 },
+ { "Compaq Netelligent Dual 10/100 TX PCI UTP",
+ TLAN_ADAPTER_NONE, 0x83 },
+ { "Compaq Netelligent 10/100 TX Embedded UTP",
+ TLAN_ADAPTER_NONE, 0x83 },
{ "Olicom OC-2183/2185", TLAN_ADAPTER_USE_INTERN_10, 0x83 },
- { "Olicom OC-2325", TLAN_ADAPTER_UNMANAGED_PHY, 0xF8 },
- { "Olicom OC-2326", TLAN_ADAPTER_USE_INTERN_10, 0xF8 },
+ { "Olicom OC-2325", TLAN_ADAPTER_UNMANAGED_PHY, 0xf8 },
+ { "Olicom OC-2326", TLAN_ADAPTER_USE_INTERN_10, 0xf8 },
{ "Compaq Netelligent 10/100 TX UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 },
- { "Compaq Netelligent 10 T/2 PCI UTP/Coax", TLAN_ADAPTER_NONE, 0x83 },
+ { "Compaq Netelligent 10 T/2 PCI UTP/coax", TLAN_ADAPTER_NONE, 0x83 },
{ "Compaq NetFlex-3/E",
- TLAN_ADAPTER_ACTIVITY_LED | /* EISA card */
+ TLAN_ADAPTER_ACTIVITY_LED | /* EISA card */
TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 },
- { "Compaq NetFlex-3/E", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, /* EISA card */
+ { "Compaq NetFlex-3/E",
+ TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, /* EISA card */
};
static DEFINE_PCI_DEVICE_TABLE(tlan_pci_tbl) = {
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETEL10,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETEL100,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETFLEX3I,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_THUNDER,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETFLEX3B,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETEL100PI,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETEL100D,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_NETEL100I,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7 },
{ PCI_VENDOR_ID_OLICOM, PCI_DEVICE_ID_OLICOM_OC2183,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8 },
{ PCI_VENDOR_ID_OLICOM, PCI_DEVICE_ID_OLICOM_OC2325,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9 },
{ PCI_VENDOR_ID_OLICOM, PCI_DEVICE_ID_OLICOM_OC2326,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_NETELLIGENT_10_100_WS_5100,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11 },
{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_NETELLIGENT_10_T2,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12 },
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12 },
{ 0,}
};
MODULE_DEVICE_TABLE(pci, tlan_pci_tbl);
-static void TLan_EisaProbe( void );
-static void TLan_Eisa_Cleanup( void );
-static int TLan_Init( struct net_device * );
-static int TLan_Open( struct net_device *dev );
-static netdev_tx_t TLan_StartTx( struct sk_buff *, struct net_device *);
-static irqreturn_t TLan_HandleInterrupt( int, void *);
-static int TLan_Close( struct net_device *);
-static struct net_device_stats *TLan_GetStats( struct net_device *);
-static void TLan_SetMulticastList( struct net_device *);
-static int TLan_ioctl( struct net_device *dev, struct ifreq *rq, int cmd);
-static int TLan_probe1( struct pci_dev *pdev, long ioaddr,
- int irq, int rev, const struct pci_device_id *ent);
-static void TLan_tx_timeout( struct net_device *dev);
-static void TLan_tx_timeout_work(struct work_struct *work);
-static int tlan_init_one( struct pci_dev *pdev, const struct pci_device_id *ent);
+static void tlan_eisa_probe(void);
+static void tlan_eisa_cleanup(void);
+static int tlan_init(struct net_device *);
+static int tlan_open(struct net_device *dev);
+static netdev_tx_t tlan_start_tx(struct sk_buff *, struct net_device *);
+static irqreturn_t tlan_handle_interrupt(int, void *);
+static int tlan_close(struct net_device *);
+static struct net_device_stats *tlan_get_stats(struct net_device *);
+static void tlan_set_multicast_list(struct net_device *);
+static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int tlan_probe1(struct pci_dev *pdev, long ioaddr,
+ int irq, int rev, const struct pci_device_id *ent);
+static void tlan_tx_timeout(struct net_device *dev);
+static void tlan_tx_timeout_work(struct work_struct *work);
+static int tlan_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent);
-static u32 TLan_HandleTxEOF( struct net_device *, u16 );
-static u32 TLan_HandleStatOverflow( struct net_device *, u16 );
-static u32 TLan_HandleRxEOF( struct net_device *, u16 );
-static u32 TLan_HandleDummy( struct net_device *, u16 );
-static u32 TLan_HandleTxEOC( struct net_device *, u16 );
-static u32 TLan_HandleStatusCheck( struct net_device *, u16 );
-static u32 TLan_HandleRxEOC( struct net_device *, u16 );
+static u32 tlan_handle_tx_eof(struct net_device *, u16);
+static u32 tlan_handle_stat_overflow(struct net_device *, u16);
+static u32 tlan_handle_rx_eof(struct net_device *, u16);
+static u32 tlan_handle_dummy(struct net_device *, u16);
+static u32 tlan_handle_tx_eoc(struct net_device *, u16);
+static u32 tlan_handle_status_check(struct net_device *, u16);
+static u32 tlan_handle_rx_eoc(struct net_device *, u16);
-static void TLan_Timer( unsigned long );
+static void tlan_timer(unsigned long);
-static void TLan_ResetLists( struct net_device * );
-static void TLan_FreeLists( struct net_device * );
-static void TLan_PrintDio( u16 );
-static void TLan_PrintList( TLanList *, char *, int );
-static void TLan_ReadAndClearStats( struct net_device *, int );
-static void TLan_ResetAdapter( struct net_device * );
-static void TLan_FinishReset( struct net_device * );
-static void TLan_SetMac( struct net_device *, int areg, char *mac );
+static void tlan_reset_lists(struct net_device *);
+static void tlan_free_lists(struct net_device *);
+static void tlan_print_dio(u16);
+static void tlan_print_list(struct tlan_list *, char *, int);
+static void tlan_read_and_clear_stats(struct net_device *, int);
+static void tlan_reset_adapter(struct net_device *);
+static void tlan_finish_reset(struct net_device *);
+static void tlan_set_mac(struct net_device *, int areg, char *mac);
-static void TLan_PhyPrint( struct net_device * );
-static void TLan_PhyDetect( struct net_device * );
-static void TLan_PhyPowerDown( struct net_device * );
-static void TLan_PhyPowerUp( struct net_device * );
-static void TLan_PhyReset( struct net_device * );
-static void TLan_PhyStartLink( struct net_device * );
-static void TLan_PhyFinishAutoNeg( struct net_device * );
+static void tlan_phy_print(struct net_device *);
+static void tlan_phy_detect(struct net_device *);
+static void tlan_phy_power_down(struct net_device *);
+static void tlan_phy_power_up(struct net_device *);
+static void tlan_phy_reset(struct net_device *);
+static void tlan_phy_start_link(struct net_device *);
+static void tlan_phy_finish_auto_neg(struct net_device *);
#ifdef MONITOR
-static void TLan_PhyMonitor( struct net_device * );
+static void tlan_phy_monitor(struct net_device *);
#endif
/*
-static int TLan_PhyNop( struct net_device * );
-static int TLan_PhyInternalCheck( struct net_device * );
-static int TLan_PhyInternalService( struct net_device * );
-static int TLan_PhyDp83840aCheck( struct net_device * );
+ static int tlan_phy_nop(struct net_device *);
+ static int tlan_phy_internal_check(struct net_device *);
+ static int tlan_phy_internal_service(struct net_device *);
+ static int tlan_phy_dp83840a_check(struct net_device *);
*/
-static bool TLan_MiiReadReg( struct net_device *, u16, u16, u16 * );
-static void TLan_MiiSendData( u16, u32, unsigned );
-static void TLan_MiiSync( u16 );
-static void TLan_MiiWriteReg( struct net_device *, u16, u16, u16 );
+static bool tlan_mii_read_reg(struct net_device *, u16, u16, u16 *);
+static void tlan_mii_send_data(u16, u32, unsigned);
+static void tlan_mii_sync(u16);
+static void tlan_mii_write_reg(struct net_device *, u16, u16, u16);
-static void TLan_EeSendStart( u16 );
-static int TLan_EeSendByte( u16, u8, int );
-static void TLan_EeReceiveByte( u16, u8 *, int );
-static int TLan_EeReadByte( struct net_device *, u8, u8 * );
+static void tlan_ee_send_start(u16);
+static int tlan_ee_send_byte(u16, u8, int);
+static void tlan_ee_receive_byte(u16, u8 *, int);
+static int tlan_ee_read_byte(struct net_device *, u8, u8 *);
static inline void
-TLan_StoreSKB( struct tlan_list_tag *tag, struct sk_buff *skb)
+tlan_store_skb(struct tlan_list *tag, struct sk_buff *skb)
{
unsigned long addr = (unsigned long)skb;
tag->buffer[9].address = addr;
@@ -358,7 +364,7 @@
}
static inline struct sk_buff *
-TLan_GetSKB( const struct tlan_list_tag *tag)
+tlan_get_skb(const struct tlan_list *tag)
{
unsigned long addr;
@@ -367,50 +373,50 @@
return (struct sk_buff *) addr;
}
-
-static TLanIntVectorFunc *TLanIntVector[TLAN_INT_NUMBER_OF_INTS] = {
+static u32
+(*tlan_int_vector[TLAN_INT_NUMBER_OF_INTS])(struct net_device *, u16) = {
NULL,
- TLan_HandleTxEOF,
- TLan_HandleStatOverflow,
- TLan_HandleRxEOF,
- TLan_HandleDummy,
- TLan_HandleTxEOC,
- TLan_HandleStatusCheck,
- TLan_HandleRxEOC
+ tlan_handle_tx_eof,
+ tlan_handle_stat_overflow,
+ tlan_handle_rx_eof,
+ tlan_handle_dummy,
+ tlan_handle_tx_eoc,
+ tlan_handle_status_check,
+ tlan_handle_rx_eoc
};
static inline void
-TLan_SetTimer( struct net_device *dev, u32 ticks, u32 type )
+tlan_set_timer(struct net_device *dev, u32 ticks, u32 type)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
unsigned long flags = 0;
if (!in_irq())
spin_lock_irqsave(&priv->lock, flags);
- if ( priv->timer.function != NULL &&
- priv->timerType != TLAN_TIMER_ACTIVITY ) {
+ if (priv->timer.function != NULL &&
+ priv->timer_type != TLAN_TIMER_ACTIVITY) {
if (!in_irq())
spin_unlock_irqrestore(&priv->lock, flags);
return;
}
- priv->timer.function = TLan_Timer;
+ priv->timer.function = tlan_timer;
if (!in_irq())
spin_unlock_irqrestore(&priv->lock, flags);
priv->timer.data = (unsigned long) dev;
- priv->timerSetAt = jiffies;
- priv->timerType = type;
+ priv->timer_set_at = jiffies;
+ priv->timer_type = type;
mod_timer(&priv->timer, jiffies + ticks);
-} /* TLan_SetTimer */
+}
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver Primary Functions
+ThunderLAN driver primary functions
- These functions are more or less common to all Linux network drivers.
+these functions are more or less common to all linux network drivers.
******************************************************************************
*****************************************************************************/
@@ -419,49 +425,117 @@
- /***************************************************************
- * tlan_remove_one
- *
- * Returns:
- * Nothing
- * Parms:
- * None
- *
- * Goes through the TLanDevices list and frees the device
- * structs and memory associated with each device (lists
- * and buffers). It also ureserves the IO port regions
- * associated with this device.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_remove_one
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * None
+ *
+ * Goes through the TLanDevices list and frees the device
+ * structs and memory associated with each device (lists
+ * and buffers). It also ureserves the IO port regions
+ * associated with this device.
+ *
+ **************************************************************/
-static void __devexit tlan_remove_one( struct pci_dev *pdev)
+static void __devexit tlan_remove_one(struct pci_dev *pdev)
{
- struct net_device *dev = pci_get_drvdata( pdev );
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct tlan_priv *priv = netdev_priv(dev);
- unregister_netdev( dev );
+ unregister_netdev(dev);
- if ( priv->dmaStorage ) {
- pci_free_consistent(priv->pciDev,
- priv->dmaSize, priv->dmaStorage,
- priv->dmaStorageDMA );
+ if (priv->dma_storage) {
+ pci_free_consistent(priv->pci_dev,
+ priv->dma_size, priv->dma_storage,
+ priv->dma_storage_dma);
}
#ifdef CONFIG_PCI
pci_release_regions(pdev);
#endif
- free_netdev( dev );
+ free_netdev(dev);
- pci_set_drvdata( pdev, NULL );
+ pci_set_drvdata(pdev, NULL);
}
+static void tlan_start(struct net_device *dev)
+{
+ tlan_reset_lists(dev);
+ /* NOTE: It might not be necessary to read the stats before a
+ reset if you don't care what the values are.
+ */
+ tlan_read_and_clear_stats(dev, TLAN_IGNORE);
+ tlan_reset_adapter(dev);
+ netif_wake_queue(dev);
+}
+
+static void tlan_stop(struct net_device *dev)
+{
+ struct tlan_priv *priv = netdev_priv(dev);
+
+ tlan_read_and_clear_stats(dev, TLAN_RECORD);
+ outl(TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD);
+ /* Reset and power down phy */
+ tlan_reset_adapter(dev);
+ if (priv->timer.function != NULL) {
+ del_timer_sync(&priv->timer);
+ priv->timer.function = NULL;
+ }
+}
+
+#ifdef CONFIG_PM
+
+static int tlan_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (netif_running(dev))
+ tlan_stop(dev);
+
+ netif_device_detach(dev);
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_wake_from_d3(pdev, false);
+ pci_set_power_state(pdev, PCI_D3hot);
+
+ return 0;
+}
+
+static int tlan_resume(struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ pci_enable_wake(pdev, 0, 0);
+ netif_device_attach(dev);
+
+ if (netif_running(dev))
+ tlan_start(dev);
+
+ return 0;
+}
+
+#else /* CONFIG_PM */
+
+#define tlan_suspend NULL
+#define tlan_resume NULL
+
+#endif /* CONFIG_PM */
+
+
static struct pci_driver tlan_driver = {
.name = "tlan",
.id_table = tlan_pci_tbl,
.probe = tlan_init_one,
.remove = __devexit_p(tlan_remove_one),
+ .suspend = tlan_suspend,
+ .resume = tlan_resume,
};
static int __init tlan_probe(void)
@@ -482,13 +556,13 @@
}
TLAN_DBG(TLAN_DEBUG_PROBE, "Starting EISA Probe....\n");
- TLan_EisaProbe();
+ tlan_eisa_probe();
printk(KERN_INFO "TLAN: %d device%s installed, PCI: %d EISA: %d\n",
- TLanDevicesInstalled, TLanDevicesInstalled == 1 ? "" : "s",
- tlan_have_pci, tlan_have_eisa);
+ tlan_devices_installed, tlan_devices_installed == 1 ? "" : "s",
+ tlan_have_pci, tlan_have_eisa);
- if (TLanDevicesInstalled == 0) {
+ if (tlan_devices_installed == 0) {
rc = -ENODEV;
goto err_out_pci_unreg;
}
@@ -501,39 +575,39 @@
}
-static int __devinit tlan_init_one( struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int __devinit tlan_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
- return TLan_probe1( pdev, -1, -1, 0, ent);
+ return tlan_probe1(pdev, -1, -1, 0, ent);
}
/*
- ***************************************************************
- * tlan_probe1
- *
- * Returns:
- * 0 on success, error code on error
- * Parms:
- * none
- *
- * The name is lower case to fit in with all the rest of
- * the netcard_probe names. This function looks for
- * another TLan based adapter, setting it up with the
- * allocated device struct if one is found.
- * tlan_probe has been ported to the new net API and
- * now allocates its own device structure. This function
- * is also used by modules.
- *
- **************************************************************/
+***************************************************************
+* tlan_probe1
+*
+* Returns:
+* 0 on success, error code on error
+* Parms:
+* none
+*
+* The name is lower case to fit in with all the rest of
+* the netcard_probe names. This function looks for
+* another TLan based adapter, setting it up with the
+* allocated device struct if one is found.
+* tlan_probe has been ported to the new net API and
+* now allocates its own device structure. This function
+* is also used by modules.
+*
+**************************************************************/
-static int __devinit TLan_probe1(struct pci_dev *pdev,
+static int __devinit tlan_probe1(struct pci_dev *pdev,
long ioaddr, int irq, int rev,
- const struct pci_device_id *ent )
+ const struct pci_device_id *ent)
{
struct net_device *dev;
- TLanPrivateInfo *priv;
+ struct tlan_priv *priv;
u16 device_id;
int reg, rc = -ENODEV;
@@ -543,7 +617,7 @@
if (rc)
return rc;
- rc = pci_request_regions(pdev, TLanSignature);
+ rc = pci_request_regions(pdev, tlan_signature);
if (rc) {
printk(KERN_ERR "TLAN: Could not reserve IO regions\n");
goto err_out;
@@ -551,7 +625,7 @@
}
#endif /* CONFIG_PCI */
- dev = alloc_etherdev(sizeof(TLanPrivateInfo));
+ dev = alloc_etherdev(sizeof(struct tlan_priv));
if (dev == NULL) {
printk(KERN_ERR "TLAN: Could not allocate memory for device.\n");
rc = -ENOMEM;
@@ -561,26 +635,28 @@
priv = netdev_priv(dev);
- priv->pciDev = pdev;
+ priv->pci_dev = pdev;
priv->dev = dev;
/* Is this a PCI device? */
if (pdev) {
- u32 pci_io_base = 0;
+ u32 pci_io_base = 0;
priv->adapter = &board_info[ent->driver_data];
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
- printk(KERN_ERR "TLAN: No suitable PCI mapping available.\n");
+ printk(KERN_ERR
+ "TLAN: No suitable PCI mapping available.\n");
goto err_out_free_dev;
}
- for ( reg= 0; reg <= 5; reg ++ ) {
+ for (reg = 0; reg <= 5; reg++) {
if (pci_resource_flags(pdev, reg) & IORESOURCE_IO) {
pci_io_base = pci_resource_start(pdev, reg);
- TLAN_DBG( TLAN_DEBUG_GNRL, "IO mapping is available at %x.\n",
- pci_io_base);
+ TLAN_DBG(TLAN_DEBUG_GNRL,
+ "IO mapping is available at %x.\n",
+ pci_io_base);
break;
}
}
@@ -592,7 +668,7 @@
dev->base_addr = pci_io_base;
dev->irq = pdev->irq;
- priv->adapterRev = pdev->revision;
+ priv->adapter_rev = pdev->revision;
pci_set_master(pdev);
pci_set_drvdata(pdev, dev);
@@ -602,11 +678,11 @@
device_id = inw(ioaddr + EISA_ID2);
priv->is_eisa = 1;
if (device_id == 0x20F1) {
- priv->adapter = &board_info[13]; /* NetFlex-3/E */
- priv->adapterRev = 23; /* TLAN 2.3 */
+ priv->adapter = &board_info[13]; /* NetFlex-3/E */
+ priv->adapter_rev = 23; /* TLAN 2.3 */
} else {
priv->adapter = &board_info[14];
- priv->adapterRev = 10; /* TLAN 1.0 */
+ priv->adapter_rev = 10; /* TLAN 1.0 */
}
dev->base_addr = ioaddr;
dev->irq = irq;
@@ -620,11 +696,11 @@
priv->speed = ((dev->mem_start & 0x18) == 0x18) ? 0
: (dev->mem_start & 0x18) >> 3;
- if (priv->speed == 0x1) {
+ if (priv->speed == 0x1)
priv->speed = TLAN_SPEED_10;
- } else if (priv->speed == 0x2) {
+ else if (priv->speed == 0x2)
priv->speed = TLAN_SPEED_100;
- }
+
debug = priv->debug = dev->mem_end;
} else {
priv->aui = aui[boards_found];
@@ -635,11 +711,11 @@
/* This will be used when we get an adapter error from
* within our irq handler */
- INIT_WORK(&priv->tlan_tqueue, TLan_tx_timeout_work);
+ INIT_WORK(&priv->tlan_tqueue, tlan_tx_timeout_work);
spin_lock_init(&priv->lock);
- rc = TLan_Init(dev);
+ rc = tlan_init(dev);
if (rc) {
printk(KERN_ERR "TLAN: Could not set up device.\n");
goto err_out_free_dev;
@@ -652,29 +728,29 @@
}
- TLanDevicesInstalled++;
+ tlan_devices_installed++;
boards_found++;
/* pdev is NULL if this is an EISA device */
if (pdev)
tlan_have_pci++;
else {
- priv->nextDevice = TLan_Eisa_Devices;
- TLan_Eisa_Devices = dev;
+ priv->next_device = tlan_eisa_devices;
+ tlan_eisa_devices = dev;
tlan_have_eisa++;
}
printk(KERN_INFO "TLAN: %s irq=%2d, io=%04x, %s, Rev. %d\n",
- dev->name,
- (int) dev->irq,
- (int) dev->base_addr,
- priv->adapter->deviceLabel,
- priv->adapterRev);
+ dev->name,
+ (int) dev->irq,
+ (int) dev->base_addr,
+ priv->adapter->device_label,
+ priv->adapter_rev);
return 0;
err_out_uninit:
- pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage,
- priv->dmaStorageDMA );
+ pci_free_consistent(priv->pci_dev, priv->dma_size, priv->dma_storage,
+ priv->dma_storage_dma);
err_out_free_dev:
free_netdev(dev);
err_out_regions:
@@ -689,22 +765,23 @@
}
-static void TLan_Eisa_Cleanup(void)
+static void tlan_eisa_cleanup(void)
{
struct net_device *dev;
- TLanPrivateInfo *priv;
+ struct tlan_priv *priv;
- while( tlan_have_eisa ) {
- dev = TLan_Eisa_Devices;
+ while (tlan_have_eisa) {
+ dev = tlan_eisa_devices;
priv = netdev_priv(dev);
- if (priv->dmaStorage) {
- pci_free_consistent(priv->pciDev, priv->dmaSize,
- priv->dmaStorage, priv->dmaStorageDMA );
+ if (priv->dma_storage) {
+ pci_free_consistent(priv->pci_dev, priv->dma_size,
+ priv->dma_storage,
+ priv->dma_storage_dma);
}
- release_region( dev->base_addr, 0x10);
- unregister_netdev( dev );
- TLan_Eisa_Devices = priv->nextDevice;
- free_netdev( dev );
+ release_region(dev->base_addr, 0x10);
+ unregister_netdev(dev);
+ tlan_eisa_devices = priv->next_device;
+ free_netdev(dev);
tlan_have_eisa--;
}
}
@@ -715,7 +792,7 @@
pci_unregister_driver(&tlan_driver);
if (tlan_have_eisa)
- TLan_Eisa_Cleanup();
+ tlan_eisa_cleanup();
}
@@ -726,24 +803,24 @@
- /**************************************************************
- * TLan_EisaProbe
- *
- * Returns: 0 on success, 1 otherwise
- *
- * Parms: None
- *
- *
- * This functions probes for EISA devices and calls
- * TLan_probe1 when one is found.
- *
- *************************************************************/
+/**************************************************************
+ * tlan_eisa_probe
+ *
+ * Returns: 0 on success, 1 otherwise
+ *
+ * Parms: None
+ *
+ *
+ * This functions probes for EISA devices and calls
+ * TLan_probe1 when one is found.
+ *
+ *************************************************************/
-static void __init TLan_EisaProbe (void)
+static void __init tlan_eisa_probe(void)
{
- long ioaddr;
- int rc = -ENODEV;
- int irq;
+ long ioaddr;
+ int rc = -ENODEV;
+ int irq;
u16 device_id;
if (!EISA_bus) {
@@ -754,15 +831,16 @@
/* Loop through all slots of the EISA bus */
for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) {
- TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n",
- (int) ioaddr + 0xC80, inw(ioaddr + EISA_ID));
- TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n",
- (int) ioaddr + 0xC82, inw(ioaddr + EISA_ID2));
+ TLAN_DBG(TLAN_DEBUG_PROBE, "EISA_ID 0x%4x: 0x%4x\n",
+ (int) ioaddr + 0xc80, inw(ioaddr + EISA_ID));
+ TLAN_DBG(TLAN_DEBUG_PROBE, "EISA_ID 0x%4x: 0x%4x\n",
+ (int) ioaddr + 0xc82, inw(ioaddr + EISA_ID2));
- TLAN_DBG(TLAN_DEBUG_PROBE, "Probing for EISA adapter at IO: 0x%4x : ",
- (int) ioaddr);
- if (request_region(ioaddr, 0x10, TLanSignature) == NULL)
+ TLAN_DBG(TLAN_DEBUG_PROBE,
+ "Probing for EISA adapter at IO: 0x%4x : ",
+ (int) ioaddr);
+ if (request_region(ioaddr, 0x10, tlan_signature) == NULL)
goto out;
if (inw(ioaddr + EISA_ID) != 0x110E) {
@@ -772,326 +850,326 @@
device_id = inw(ioaddr + EISA_ID2);
if (device_id != 0x20F1 && device_id != 0x40F1) {
- release_region (ioaddr, 0x10);
+ release_region(ioaddr, 0x10);
goto out;
}
- if (inb(ioaddr + EISA_CR) != 0x1) { /* Check if adapter is enabled */
- release_region (ioaddr, 0x10);
+ /* check if adapter is enabled */
+ if (inb(ioaddr + EISA_CR) != 0x1) {
+ release_region(ioaddr, 0x10);
goto out2;
}
if (debug == 0x10)
- printk("Found one\n");
+ printk(KERN_INFO "Found one\n");
/* Get irq from board */
- switch (inb(ioaddr + 0xCC0)) {
- case(0x10):
- irq=5;
- break;
- case(0x20):
- irq=9;
- break;
- case(0x40):
- irq=10;
- break;
- case(0x80):
- irq=11;
- break;
- default:
- goto out;
+ switch (inb(ioaddr + 0xcc0)) {
+ case(0x10):
+ irq = 5;
+ break;
+ case(0x20):
+ irq = 9;
+ break;
+ case(0x40):
+ irq = 10;
+ break;
+ case(0x80):
+ irq = 11;
+ break;
+ default:
+ goto out;
}
/* Setup the newly found eisa adapter */
- rc = TLan_probe1( NULL, ioaddr, irq,
- 12, NULL);
+ rc = tlan_probe1(NULL, ioaddr, irq,
+ 12, NULL);
continue;
- out:
- if (debug == 0x10)
- printk("None found\n");
- continue;
+out:
+ if (debug == 0x10)
+ printk(KERN_INFO "None found\n");
+ continue;
- out2: if (debug == 0x10)
- printk("Card found but it is not enabled, skipping\n");
- continue;
+out2:
+ if (debug == 0x10)
+ printk(KERN_INFO "Card found but it is not enabled, skipping\n");
+ continue;
}
-} /* TLan_EisaProbe */
+}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static void TLan_Poll(struct net_device *dev)
+static void tlan_poll(struct net_device *dev)
{
disable_irq(dev->irq);
- TLan_HandleInterrupt(dev->irq, dev);
+ tlan_handle_interrupt(dev->irq, dev);
enable_irq(dev->irq);
}
#endif
-static const struct net_device_ops TLan_netdev_ops = {
- .ndo_open = TLan_Open,
- .ndo_stop = TLan_Close,
- .ndo_start_xmit = TLan_StartTx,
- .ndo_tx_timeout = TLan_tx_timeout,
- .ndo_get_stats = TLan_GetStats,
- .ndo_set_multicast_list = TLan_SetMulticastList,
- .ndo_do_ioctl = TLan_ioctl,
+static const struct net_device_ops tlan_netdev_ops = {
+ .ndo_open = tlan_open,
+ .ndo_stop = tlan_close,
+ .ndo_start_xmit = tlan_start_tx,
+ .ndo_tx_timeout = tlan_tx_timeout,
+ .ndo_get_stats = tlan_get_stats,
+ .ndo_set_multicast_list = tlan_set_multicast_list,
+ .ndo_do_ioctl = tlan_ioctl,
.ndo_change_mtu = eth_change_mtu,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = TLan_Poll,
+ .ndo_poll_controller = tlan_poll,
#endif
};
- /***************************************************************
- * TLan_Init
- *
- * Returns:
- * 0 on success, error code otherwise.
- * Parms:
- * dev The structure of the device to be
- * init'ed.
- *
- * This function completes the initialization of the
- * device structure and driver. It reserves the IO
- * addresses, allocates memory for the lists and bounce
- * buffers, retrieves the MAC address from the eeprom
- * and assignes the device's methods.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_init
+ *
+ * Returns:
+ * 0 on success, error code otherwise.
+ * Parms:
+ * dev The structure of the device to be
+ * init'ed.
+ *
+ * This function completes the initialization of the
+ * device structure and driver. It reserves the IO
+ * addresses, allocates memory for the lists and bounce
+ * buffers, retrieves the MAC address from the eeprom
+ * and assignes the device's methods.
+ *
+ **************************************************************/
-static int TLan_Init( struct net_device *dev )
+static int tlan_init(struct net_device *dev)
{
int dma_size;
- int err;
+ int err;
int i;
- TLanPrivateInfo *priv;
+ struct tlan_priv *priv;
priv = netdev_priv(dev);
- dma_size = ( TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS )
- * ( sizeof(TLanList) );
- priv->dmaStorage = pci_alloc_consistent(priv->pciDev,
- dma_size, &priv->dmaStorageDMA);
- priv->dmaSize = dma_size;
+ dma_size = (TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS)
+ * (sizeof(struct tlan_list));
+ priv->dma_storage = pci_alloc_consistent(priv->pci_dev,
+ dma_size,
+ &priv->dma_storage_dma);
+ priv->dma_size = dma_size;
- if ( priv->dmaStorage == NULL ) {
- printk(KERN_ERR "TLAN: Could not allocate lists and buffers for %s.\n",
- dev->name );
+ if (priv->dma_storage == NULL) {
+ printk(KERN_ERR
+ "TLAN: Could not allocate lists and buffers for %s.\n",
+ dev->name);
return -ENOMEM;
}
- memset( priv->dmaStorage, 0, dma_size );
- priv->rxList = (TLanList *) ALIGN((unsigned long)priv->dmaStorage, 8);
- priv->rxListDMA = ALIGN(priv->dmaStorageDMA, 8);
- priv->txList = priv->rxList + TLAN_NUM_RX_LISTS;
- priv->txListDMA = priv->rxListDMA + sizeof(TLanList) * TLAN_NUM_RX_LISTS;
+ memset(priv->dma_storage, 0, dma_size);
+ priv->rx_list = (struct tlan_list *)
+ ALIGN((unsigned long)priv->dma_storage, 8);
+ priv->rx_list_dma = ALIGN(priv->dma_storage_dma, 8);
+ priv->tx_list = priv->rx_list + TLAN_NUM_RX_LISTS;
+ priv->tx_list_dma =
+ priv->rx_list_dma + sizeof(struct tlan_list)*TLAN_NUM_RX_LISTS;
err = 0;
- for ( i = 0; i < 6 ; i++ )
- err |= TLan_EeReadByte( dev,
- (u8) priv->adapter->addrOfs + i,
- (u8 *) &dev->dev_addr[i] );
- if ( err ) {
+ for (i = 0; i < 6 ; i++)
+ err |= tlan_ee_read_byte(dev,
+ (u8) priv->adapter->addr_ofs + i,
+ (u8 *) &dev->dev_addr[i]);
+ if (err) {
printk(KERN_ERR "TLAN: %s: Error reading MAC from eeprom: %d\n",
- dev->name,
- err );
+ dev->name,
+ err);
}
dev->addr_len = 6;
netif_carrier_off(dev);
/* Device methods */
- dev->netdev_ops = &TLan_netdev_ops;
+ dev->netdev_ops = &tlan_netdev_ops;
dev->watchdog_timeo = TX_TIMEOUT;
return 0;
-} /* TLan_Init */
+}
- /***************************************************************
- * TLan_Open
- *
- * Returns:
- * 0 on success, error code otherwise.
- * Parms:
- * dev Structure of device to be opened.
- *
- * This routine puts the driver and TLAN adapter in a
- * state where it is ready to send and receive packets.
- * It allocates the IRQ, resets and brings the adapter
- * out of reset, and allows interrupts. It also delays
- * the startup for autonegotiation or sends a Rx GO
- * command to the adapter, as appropriate.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_open
+ *
+ * Returns:
+ * 0 on success, error code otherwise.
+ * Parms:
+ * dev Structure of device to be opened.
+ *
+ * This routine puts the driver and TLAN adapter in a
+ * state where it is ready to send and receive packets.
+ * It allocates the IRQ, resets and brings the adapter
+ * out of reset, and allows interrupts. It also delays
+ * the startup for autonegotiation or sends a Rx GO
+ * command to the adapter, as appropriate.
+ *
+ **************************************************************/
-static int TLan_Open( struct net_device *dev )
+static int tlan_open(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int err;
- priv->tlanRev = TLan_DioRead8( dev->base_addr, TLAN_DEF_REVISION );
- err = request_irq( dev->irq, TLan_HandleInterrupt, IRQF_SHARED,
- dev->name, dev );
+ priv->tlan_rev = tlan_dio_read8(dev->base_addr, TLAN_DEF_REVISION);
+ err = request_irq(dev->irq, tlan_handle_interrupt, IRQF_SHARED,
+ dev->name, dev);
- if ( err ) {
+ if (err) {
pr_err("TLAN: Cannot open %s because IRQ %d is already in use.\n",
- dev->name, dev->irq );
+ dev->name, dev->irq);
return err;
}
init_timer(&priv->timer);
- netif_start_queue(dev);
- /* NOTE: It might not be necessary to read the stats before a
- reset if you don't care what the values are.
- */
- TLan_ResetLists( dev );
- TLan_ReadAndClearStats( dev, TLAN_IGNORE );
- TLan_ResetAdapter( dev );
+ tlan_start(dev);
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Opened. TLAN Chip Rev: %x\n",
- dev->name, priv->tlanRev );
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Opened. TLAN Chip Rev: %x\n",
+ dev->name, priv->tlan_rev);
return 0;
-} /* TLan_Open */
+}
- /**************************************************************
- * TLan_ioctl
- *
- * Returns:
- * 0 on success, error code otherwise
- * Params:
- * dev structure of device to receive ioctl.
- *
- * rq ifreq structure to hold userspace data.
- *
- * cmd ioctl command.
- *
- *
- *************************************************************/
+/**************************************************************
+ * tlan_ioctl
+ *
+ * Returns:
+ * 0 on success, error code otherwise
+ * Params:
+ * dev structure of device to receive ioctl.
+ *
+ * rq ifreq structure to hold userspace data.
+ *
+ * cmd ioctl command.
+ *
+ *
+ *************************************************************/
-static int TLan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
struct mii_ioctl_data *data = if_mii(rq);
- u32 phy = priv->phy[priv->phyNum];
+ u32 phy = priv->phy[priv->phy_num];
- if (!priv->phyOnline)
+ if (!priv->phy_online)
return -EAGAIN;
- switch(cmd) {
- case SIOCGMIIPHY: /* Get address of MII PHY in use. */
- data->phy_id = phy;
+ switch (cmd) {
+ case SIOCGMIIPHY: /* get address of MII PHY in use. */
+ data->phy_id = phy;
- case SIOCGMIIREG: /* Read MII PHY register. */
- TLan_MiiReadReg(dev, data->phy_id & 0x1f,
- data->reg_num & 0x1f, &data->val_out);
- return 0;
+ case SIOCGMIIREG: /* read MII PHY register. */
+ tlan_mii_read_reg(dev, data->phy_id & 0x1f,
+ data->reg_num & 0x1f, &data->val_out);
+ return 0;
- case SIOCSMIIREG: /* Write MII PHY register. */
- TLan_MiiWriteReg(dev, data->phy_id & 0x1f,
- data->reg_num & 0x1f, data->val_in);
- return 0;
- default:
- return -EOPNOTSUPP;
+ case SIOCSMIIREG: /* write MII PHY register. */
+ tlan_mii_write_reg(dev, data->phy_id & 0x1f,
+ data->reg_num & 0x1f, data->val_in);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
}
-} /* tlan_ioctl */
+}
- /***************************************************************
- * TLan_tx_timeout
- *
- * Returns: nothing
- *
- * Params:
- * dev structure of device which timed out
- * during transmit.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_tx_timeout
+ *
+ * Returns: nothing
+ *
+ * Params:
+ * dev structure of device which timed out
+ * during transmit.
+ *
+ **************************************************************/
-static void TLan_tx_timeout(struct net_device *dev)
+static void tlan_tx_timeout(struct net_device *dev)
{
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name);
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name);
/* Ok so we timed out, lets see what we can do about it...*/
- TLan_FreeLists( dev );
- TLan_ResetLists( dev );
- TLan_ReadAndClearStats( dev, TLAN_IGNORE );
- TLan_ResetAdapter( dev );
+ tlan_free_lists(dev);
+ tlan_reset_lists(dev);
+ tlan_read_and_clear_stats(dev, TLAN_IGNORE);
+ tlan_reset_adapter(dev);
dev->trans_start = jiffies; /* prevent tx timeout */
- netif_wake_queue( dev );
+ netif_wake_queue(dev);
}
- /***************************************************************
- * TLan_tx_timeout_work
- *
- * Returns: nothing
- *
- * Params:
- * work work item of device which timed out
- *
- **************************************************************/
+/***************************************************************
+ * tlan_tx_timeout_work
+ *
+ * Returns: nothing
+ *
+ * Params:
+ * work work item of device which timed out
+ *
+ **************************************************************/
-static void TLan_tx_timeout_work(struct work_struct *work)
+static void tlan_tx_timeout_work(struct work_struct *work)
{
- TLanPrivateInfo *priv =
- container_of(work, TLanPrivateInfo, tlan_tqueue);
+ struct tlan_priv *priv =
+ container_of(work, struct tlan_priv, tlan_tqueue);
- TLan_tx_timeout(priv->dev);
+ tlan_tx_timeout(priv->dev);
}
- /***************************************************************
- * TLan_StartTx
- *
- * Returns:
- * 0 on success, non-zero on failure.
- * Parms:
- * skb A pointer to the sk_buff containing the
- * frame to be sent.
- * dev The device to send the data on.
- *
- * This function adds a frame to the Tx list to be sent
- * ASAP. First it verifies that the adapter is ready and
- * there is room in the queue. Then it sets up the next
- * available list, copies the frame to the corresponding
- * buffer. If the adapter Tx channel is idle, it gives
- * the adapter a Tx Go command on the list, otherwise it
- * sets the forward address of the previous list to point
- * to this one. Then it frees the sk_buff.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_start_tx
+ *
+ * Returns:
+ * 0 on success, non-zero on failure.
+ * Parms:
+ * skb A pointer to the sk_buff containing the
+ * frame to be sent.
+ * dev The device to send the data on.
+ *
+ * This function adds a frame to the Tx list to be sent
+ * ASAP. First it verifies that the adapter is ready and
+ * there is room in the queue. Then it sets up the next
+ * available list, copies the frame to the corresponding
+ * buffer. If the adapter Tx channel is idle, it gives
+ * the adapter a Tx Go command on the list, otherwise it
+ * sets the forward address of the previous list to point
+ * to this one. Then it frees the sk_buff.
+ *
+ **************************************************************/
-static netdev_tx_t TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
+static netdev_tx_t tlan_start_tx(struct sk_buff *skb, struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
dma_addr_t tail_list_phys;
- TLanList *tail_list;
+ struct tlan_list *tail_list;
unsigned long flags;
unsigned int txlen;
- if ( ! priv->phyOnline ) {
- TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s PHY is not ready\n",
- dev->name );
+ if (!priv->phy_online) {
+ TLAN_DBG(TLAN_DEBUG_TX, "TRANSMIT: %s PHY is not ready\n",
+ dev->name);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
@@ -1100,218 +1178,214 @@
return NETDEV_TX_OK;
txlen = max(skb->len, (unsigned int)TLAN_MIN_FRAME_SIZE);
- tail_list = priv->txList + priv->txTail;
- tail_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txTail;
+ tail_list = priv->tx_list + priv->tx_tail;
+ tail_list_phys =
+ priv->tx_list_dma + sizeof(struct tlan_list)*priv->tx_tail;
- if ( tail_list->cStat != TLAN_CSTAT_UNUSED ) {
- TLAN_DBG( TLAN_DEBUG_TX,
- "TRANSMIT: %s is busy (Head=%d Tail=%d)\n",
- dev->name, priv->txHead, priv->txTail );
+ if (tail_list->c_stat != TLAN_CSTAT_UNUSED) {
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: %s is busy (Head=%d Tail=%d)\n",
+ dev->name, priv->tx_head, priv->tx_tail);
netif_stop_queue(dev);
- priv->txBusyCount++;
+ priv->tx_busy_count++;
return NETDEV_TX_BUSY;
}
tail_list->forward = 0;
- tail_list->buffer[0].address = pci_map_single(priv->pciDev,
+ tail_list->buffer[0].address = pci_map_single(priv->pci_dev,
skb->data, txlen,
PCI_DMA_TODEVICE);
- TLan_StoreSKB(tail_list, skb);
+ tlan_store_skb(tail_list, skb);
- tail_list->frameSize = (u16) txlen;
+ tail_list->frame_size = (u16) txlen;
tail_list->buffer[0].count = TLAN_LAST_BUFFER | (u32) txlen;
tail_list->buffer[1].count = 0;
tail_list->buffer[1].address = 0;
spin_lock_irqsave(&priv->lock, flags);
- tail_list->cStat = TLAN_CSTAT_READY;
- if ( ! priv->txInProgress ) {
- priv->txInProgress = 1;
- TLAN_DBG( TLAN_DEBUG_TX,
- "TRANSMIT: Starting TX on buffer %d\n", priv->txTail );
- outl( tail_list_phys, dev->base_addr + TLAN_CH_PARM );
- outl( TLAN_HC_GO, dev->base_addr + TLAN_HOST_CMD );
+ tail_list->c_stat = TLAN_CSTAT_READY;
+ if (!priv->tx_in_progress) {
+ priv->tx_in_progress = 1;
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: Starting TX on buffer %d\n",
+ priv->tx_tail);
+ outl(tail_list_phys, dev->base_addr + TLAN_CH_PARM);
+ outl(TLAN_HC_GO, dev->base_addr + TLAN_HOST_CMD);
} else {
- TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Adding buffer %d to TX channel\n",
- priv->txTail );
- if ( priv->txTail == 0 ) {
- ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: Adding buffer %d to TX channel\n",
+ priv->tx_tail);
+ if (priv->tx_tail == 0) {
+ (priv->tx_list + (TLAN_NUM_TX_LISTS - 1))->forward
= tail_list_phys;
} else {
- ( priv->txList + ( priv->txTail - 1 ) )->forward
+ (priv->tx_list + (priv->tx_tail - 1))->forward
= tail_list_phys;
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- CIRC_INC( priv->txTail, TLAN_NUM_TX_LISTS );
+ CIRC_INC(priv->tx_tail, TLAN_NUM_TX_LISTS);
return NETDEV_TX_OK;
-} /* TLan_StartTx */
+}
- /***************************************************************
- * TLan_HandleInterrupt
- *
- * Returns:
- * Nothing
- * Parms:
- * irq The line on which the interrupt
- * occurred.
- * dev_id A pointer to the device assigned to
- * this irq line.
- *
- * This function handles an interrupt generated by its
- * assigned TLAN adapter. The function deactivates
- * interrupts on its adapter, records the type of
- * interrupt, executes the appropriate subhandler, and
- * acknowdges the interrupt to the adapter (thus
- * re-enabling adapter interrupts.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_interrupt
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * irq The line on which the interrupt
+ * occurred.
+ * dev_id A pointer to the device assigned to
+ * this irq line.
+ *
+ * This function handles an interrupt generated by its
+ * assigned TLAN adapter. The function deactivates
+ * interrupts on its adapter, records the type of
+ * interrupt, executes the appropriate subhandler, and
+ * acknowdges the interrupt to the adapter (thus
+ * re-enabling adapter interrupts.
+ *
+ **************************************************************/
-static irqreturn_t TLan_HandleInterrupt(int irq, void *dev_id)
+static irqreturn_t tlan_handle_interrupt(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 host_int;
u16 type;
spin_lock(&priv->lock);
- host_int = inw( dev->base_addr + TLAN_HOST_INT );
- type = ( host_int & TLAN_HI_IT_MASK ) >> 2;
- if ( type ) {
+ host_int = inw(dev->base_addr + TLAN_HOST_INT);
+ type = (host_int & TLAN_HI_IT_MASK) >> 2;
+ if (type) {
u32 ack;
u32 host_cmd;
- outw( host_int, dev->base_addr + TLAN_HOST_INT );
- ack = TLanIntVector[type]( dev, host_int );
+ outw(host_int, dev->base_addr + TLAN_HOST_INT);
+ ack = tlan_int_vector[type](dev, host_int);
- if ( ack ) {
- host_cmd = TLAN_HC_ACK | ack | ( type << 18 );
- outl( host_cmd, dev->base_addr + TLAN_HOST_CMD );
+ if (ack) {
+ host_cmd = TLAN_HC_ACK | ack | (type << 18);
+ outl(host_cmd, dev->base_addr + TLAN_HOST_CMD);
}
}
spin_unlock(&priv->lock);
return IRQ_RETVAL(type);
-} /* TLan_HandleInterrupts */
+}
- /***************************************************************
- * TLan_Close
- *
- * Returns:
- * An error code.
- * Parms:
- * dev The device structure of the device to
- * close.
- *
- * This function shuts down the adapter. It records any
- * stats, puts the adapter into reset state, deactivates
- * its time as needed, and frees the irq it is using.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_close
+ *
+ * Returns:
+ * An error code.
+ * Parms:
+ * dev The device structure of the device to
+ * close.
+ *
+ * This function shuts down the adapter. It records any
+ * stats, puts the adapter into reset state, deactivates
+ * its time as needed, and frees the irq it is using.
+ *
+ **************************************************************/
-static int TLan_Close(struct net_device *dev)
+static int tlan_close(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
- netif_stop_queue(dev);
priv->neg_be_verbose = 0;
+ tlan_stop(dev);
- TLan_ReadAndClearStats( dev, TLAN_RECORD );
- outl( TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD );
- if ( priv->timer.function != NULL ) {
- del_timer_sync( &priv->timer );
- priv->timer.function = NULL;
- }
-
- free_irq( dev->irq, dev );
- TLan_FreeLists( dev );
- TLAN_DBG( TLAN_DEBUG_GNRL, "Device %s closed.\n", dev->name );
+ free_irq(dev->irq, dev);
+ tlan_free_lists(dev);
+ TLAN_DBG(TLAN_DEBUG_GNRL, "Device %s closed.\n", dev->name);
return 0;
-} /* TLan_Close */
+}
- /***************************************************************
- * TLan_GetStats
- *
- * Returns:
- * A pointer to the device's statistics structure.
- * Parms:
- * dev The device structure to return the
- * stats for.
- *
- * This function updates the devices statistics by reading
- * the TLAN chip's onboard registers. Then it returns the
- * address of the statistics structure.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_get_stats
+ *
+ * Returns:
+ * A pointer to the device's statistics structure.
+ * Parms:
+ * dev The device structure to return the
+ * stats for.
+ *
+ * This function updates the devices statistics by reading
+ * the TLAN chip's onboard registers. Then it returns the
+ * address of the statistics structure.
+ *
+ **************************************************************/
-static struct net_device_stats *TLan_GetStats( struct net_device *dev )
+static struct net_device_stats *tlan_get_stats(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int i;
/* Should only read stats if open ? */
- TLan_ReadAndClearStats( dev, TLAN_RECORD );
+ tlan_read_and_clear_stats(dev, TLAN_RECORD);
- TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: %s EOC count = %d\n", dev->name,
- priv->rxEocCount );
- TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s Busy count = %d\n", dev->name,
- priv->txBusyCount );
- if ( debug & TLAN_DEBUG_GNRL ) {
- TLan_PrintDio( dev->base_addr );
- TLan_PhyPrint( dev );
+ TLAN_DBG(TLAN_DEBUG_RX, "RECEIVE: %s EOC count = %d\n", dev->name,
+ priv->rx_eoc_count);
+ TLAN_DBG(TLAN_DEBUG_TX, "TRANSMIT: %s Busy count = %d\n", dev->name,
+ priv->tx_busy_count);
+ if (debug & TLAN_DEBUG_GNRL) {
+ tlan_print_dio(dev->base_addr);
+ tlan_phy_print(dev);
}
- if ( debug & TLAN_DEBUG_LIST ) {
- for ( i = 0; i < TLAN_NUM_RX_LISTS; i++ )
- TLan_PrintList( priv->rxList + i, "RX", i );
- for ( i = 0; i < TLAN_NUM_TX_LISTS; i++ )
- TLan_PrintList( priv->txList + i, "TX", i );
+ if (debug & TLAN_DEBUG_LIST) {
+ for (i = 0; i < TLAN_NUM_RX_LISTS; i++)
+ tlan_print_list(priv->rx_list + i, "RX", i);
+ for (i = 0; i < TLAN_NUM_TX_LISTS; i++)
+ tlan_print_list(priv->tx_list + i, "TX", i);
}
return &dev->stats;
-} /* TLan_GetStats */
+}
- /***************************************************************
- * TLan_SetMulticastList
- *
- * Returns:
- * Nothing
- * Parms:
- * dev The device structure to set the
- * multicast list for.
- *
- * This function sets the TLAN adaptor to various receive
- * modes. If the IFF_PROMISC flag is set, promiscuous
- * mode is acitviated. Otherwise, promiscuous mode is
- * turned off. If the IFF_ALLMULTI flag is set, then
- * the hash table is set to receive all group addresses.
- * Otherwise, the first three multicast addresses are
- * stored in AREG_1-3, and the rest are selected via the
- * hash table, as necessary.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_set_multicast_list
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev The device structure to set the
+ * multicast list for.
+ *
+ * This function sets the TLAN adaptor to various receive
+ * modes. If the IFF_PROMISC flag is set, promiscuous
+ * mode is acitviated. Otherwise, promiscuous mode is
+ * turned off. If the IFF_ALLMULTI flag is set, then
+ * the hash table is set to receive all group addresses.
+ * Otherwise, the first three multicast addresses are
+ * stored in AREG_1-3, and the rest are selected via the
+ * hash table, as necessary.
+ *
+ **************************************************************/
-static void TLan_SetMulticastList( struct net_device *dev )
+static void tlan_set_multicast_list(struct net_device *dev)
{
struct netdev_hw_addr *ha;
u32 hash1 = 0;
@@ -1320,53 +1394,56 @@
u32 offset;
u8 tmp;
- if ( dev->flags & IFF_PROMISC ) {
- tmp = TLan_DioRead8( dev->base_addr, TLAN_NET_CMD );
- TLan_DioWrite8( dev->base_addr,
- TLAN_NET_CMD, tmp | TLAN_NET_CMD_CAF );
+ if (dev->flags & IFF_PROMISC) {
+ tmp = tlan_dio_read8(dev->base_addr, TLAN_NET_CMD);
+ tlan_dio_write8(dev->base_addr,
+ TLAN_NET_CMD, tmp | TLAN_NET_CMD_CAF);
} else {
- tmp = TLan_DioRead8( dev->base_addr, TLAN_NET_CMD );
- TLan_DioWrite8( dev->base_addr,
- TLAN_NET_CMD, tmp & ~TLAN_NET_CMD_CAF );
- if ( dev->flags & IFF_ALLMULTI ) {
- for ( i = 0; i < 3; i++ )
- TLan_SetMac( dev, i + 1, NULL );
- TLan_DioWrite32( dev->base_addr, TLAN_HASH_1, 0xFFFFFFFF );
- TLan_DioWrite32( dev->base_addr, TLAN_HASH_2, 0xFFFFFFFF );
+ tmp = tlan_dio_read8(dev->base_addr, TLAN_NET_CMD);
+ tlan_dio_write8(dev->base_addr,
+ TLAN_NET_CMD, tmp & ~TLAN_NET_CMD_CAF);
+ if (dev->flags & IFF_ALLMULTI) {
+ for (i = 0; i < 3; i++)
+ tlan_set_mac(dev, i + 1, NULL);
+ tlan_dio_write32(dev->base_addr, TLAN_HASH_1,
+ 0xffffffff);
+ tlan_dio_write32(dev->base_addr, TLAN_HASH_2,
+ 0xffffffff);
} else {
i = 0;
netdev_for_each_mc_addr(ha, dev) {
- if ( i < 3 ) {
- TLan_SetMac( dev, i + 1,
+ if (i < 3) {
+ tlan_set_mac(dev, i + 1,
(char *) &ha->addr);
} else {
- offset = TLan_HashFunc((u8 *)&ha->addr);
- if ( offset < 32 )
- hash1 |= ( 1 << offset );
+ offset =
+ tlan_hash_func((u8 *)&ha->addr);
+ if (offset < 32)
+ hash1 |= (1 << offset);
else
- hash2 |= ( 1 << ( offset - 32 ) );
+ hash2 |= (1 << (offset - 32));
}
i++;
}
- for ( ; i < 3; i++ )
- TLan_SetMac( dev, i + 1, NULL );
- TLan_DioWrite32( dev->base_addr, TLAN_HASH_1, hash1 );
- TLan_DioWrite32( dev->base_addr, TLAN_HASH_2, hash2 );
+ for ( ; i < 3; i++)
+ tlan_set_mac(dev, i + 1, NULL);
+ tlan_dio_write32(dev->base_addr, TLAN_HASH_1, hash1);
+ tlan_dio_write32(dev->base_addr, TLAN_HASH_2, hash2);
}
}
-} /* TLan_SetMulticastList */
+}
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver Interrupt Vectors and Table
+ThunderLAN driver interrupt vectors and table
- Please see Chap. 4, "Interrupt Handling" of the "ThunderLAN
- Programmer's Guide" for more informations on handling interrupts
- generated by TLAN based adapters.
+please see chap. 4, "Interrupt Handling" of the "ThunderLAN
+Programmer's Guide" for more informations on handling interrupts
+generated by TLAN based adapters.
******************************************************************************
*****************************************************************************/
@@ -1374,46 +1451,48 @@
- /***************************************************************
- * TLan_HandleTxEOF
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This function handles Tx EOF interrupts which are raised
- * by the adapter when it has completed sending the
- * contents of a buffer. If detemines which list/buffer
- * was completed and resets it. If the buffer was the last
- * in the channel (EOC), then the function checks to see if
- * another buffer is ready to send, and if so, sends a Tx
- * Go command. Finally, the driver activates/continues the
- * activity LED.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_tx_eof
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This function handles Tx EOF interrupts which are raised
+ * by the adapter when it has completed sending the
+ * contents of a buffer. If detemines which list/buffer
+ * was completed and resets it. If the buffer was the last
+ * in the channel (EOC), then the function checks to see if
+ * another buffer is ready to send, and if so, sends a Tx
+ * Go command. Finally, the driver activates/continues the
+ * activity LED.
+ *
+ **************************************************************/
-static u32 TLan_HandleTxEOF( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int eoc = 0;
- TLanList *head_list;
+ struct tlan_list *head_list;
dma_addr_t head_list_phys;
u32 ack = 0;
- u16 tmpCStat;
+ u16 tmp_c_stat;
- TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n",
- priv->txHead, priv->txTail );
- head_list = priv->txList + priv->txHead;
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n",
+ priv->tx_head, priv->tx_tail);
+ head_list = priv->tx_list + priv->tx_head;
- while (((tmpCStat = head_list->cStat ) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) {
- struct sk_buff *skb = TLan_GetSKB(head_list);
+ while (((tmp_c_stat = head_list->c_stat) & TLAN_CSTAT_FRM_CMP)
+ && (ack < 255)) {
+ struct sk_buff *skb = tlan_get_skb(head_list);
ack++;
- pci_unmap_single(priv->pciDev, head_list->buffer[0].address,
+ pci_unmap_single(priv->pci_dev, head_list->buffer[0].address,
max(skb->len,
(unsigned int)TLAN_MIN_FRAME_SIZE),
PCI_DMA_TODEVICE);
@@ -1421,304 +1500,311 @@
head_list->buffer[8].address = 0;
head_list->buffer[9].address = 0;
- if ( tmpCStat & TLAN_CSTAT_EOC )
+ if (tmp_c_stat & TLAN_CSTAT_EOC)
eoc = 1;
- dev->stats.tx_bytes += head_list->frameSize;
+ dev->stats.tx_bytes += head_list->frame_size;
- head_list->cStat = TLAN_CSTAT_UNUSED;
+ head_list->c_stat = TLAN_CSTAT_UNUSED;
netif_start_queue(dev);
- CIRC_INC( priv->txHead, TLAN_NUM_TX_LISTS );
- head_list = priv->txList + priv->txHead;
+ CIRC_INC(priv->tx_head, TLAN_NUM_TX_LISTS);
+ head_list = priv->tx_list + priv->tx_head;
}
if (!ack)
- printk(KERN_INFO "TLAN: Received interrupt for uncompleted TX frame.\n");
+ printk(KERN_INFO
+ "TLAN: Received interrupt for uncompleted TX frame.\n");
- if ( eoc ) {
- TLAN_DBG( TLAN_DEBUG_TX,
- "TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n",
- priv->txHead, priv->txTail );
- head_list = priv->txList + priv->txHead;
- head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead;
- if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) {
- outl(head_list_phys, dev->base_addr + TLAN_CH_PARM );
+ if (eoc) {
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: handling TX EOC (Head=%d Tail=%d)\n",
+ priv->tx_head, priv->tx_tail);
+ head_list = priv->tx_list + priv->tx_head;
+ head_list_phys = priv->tx_list_dma
+ + sizeof(struct tlan_list)*priv->tx_head;
+ if (head_list->c_stat & TLAN_CSTAT_READY) {
+ outl(head_list_phys, dev->base_addr + TLAN_CH_PARM);
ack |= TLAN_HC_GO;
} else {
- priv->txInProgress = 0;
+ priv->tx_in_progress = 0;
}
}
- if ( priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED ) {
- TLan_DioWrite8( dev->base_addr,
- TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT );
- if ( priv->timer.function == NULL ) {
- priv->timer.function = TLan_Timer;
- priv->timer.data = (unsigned long) dev;
- priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
- priv->timerSetAt = jiffies;
- priv->timerType = TLAN_TIMER_ACTIVITY;
- add_timer(&priv->timer);
- } else if ( priv->timerType == TLAN_TIMER_ACTIVITY ) {
- priv->timerSetAt = jiffies;
+ if (priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED) {
+ tlan_dio_write8(dev->base_addr,
+ TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
+ if (priv->timer.function == NULL) {
+ priv->timer.function = tlan_timer;
+ priv->timer.data = (unsigned long) dev;
+ priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
+ priv->timer_set_at = jiffies;
+ priv->timer_type = TLAN_TIMER_ACTIVITY;
+ add_timer(&priv->timer);
+ } else if (priv->timer_type == TLAN_TIMER_ACTIVITY) {
+ priv->timer_set_at = jiffies;
}
}
return ack;
-} /* TLan_HandleTxEOF */
+}
- /***************************************************************
- * TLan_HandleStatOverflow
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This function handles the Statistics Overflow interrupt
- * which means that one or more of the TLAN statistics
- * registers has reached 1/2 capacity and needs to be read.
- *
- **************************************************************/
+/***************************************************************
+ * TLan_HandleStatOverflow
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This function handles the Statistics Overflow interrupt
+ * which means that one or more of the TLAN statistics
+ * registers has reached 1/2 capacity and needs to be read.
+ *
+ **************************************************************/
-static u32 TLan_HandleStatOverflow( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_stat_overflow(struct net_device *dev, u16 host_int)
{
- TLan_ReadAndClearStats( dev, TLAN_RECORD );
+ tlan_read_and_clear_stats(dev, TLAN_RECORD);
return 1;
-} /* TLan_HandleStatOverflow */
+}
- /***************************************************************
- * TLan_HandleRxEOF
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This function handles the Rx EOF interrupt which
- * indicates a frame has been received by the adapter from
- * the net and the frame has been transferred to memory.
- * The function determines the bounce buffer the frame has
- * been loaded into, creates a new sk_buff big enough to
- * hold the frame, and sends it to protocol stack. It
- * then resets the used buffer and appends it to the end
- * of the list. If the frame was the last in the Rx
- * channel (EOC), the function restarts the receive channel
- * by sending an Rx Go command to the adapter. Then it
- * activates/continues the activity LED.
- *
- **************************************************************/
+/***************************************************************
+ * TLan_HandleRxEOF
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This function handles the Rx EOF interrupt which
+ * indicates a frame has been received by the adapter from
+ * the net and the frame has been transferred to memory.
+ * The function determines the bounce buffer the frame has
+ * been loaded into, creates a new sk_buff big enough to
+ * hold the frame, and sends it to protocol stack. It
+ * then resets the used buffer and appends it to the end
+ * of the list. If the frame was the last in the Rx
+ * channel (EOC), the function restarts the receive channel
+ * by sending an Rx Go command to the adapter. Then it
+ * activates/continues the activity LED.
+ *
+ **************************************************************/
-static u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_rx_eof(struct net_device *dev, u16 host_int)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u32 ack = 0;
int eoc = 0;
- TLanList *head_list;
+ struct tlan_list *head_list;
struct sk_buff *skb;
- TLanList *tail_list;
- u16 tmpCStat;
+ struct tlan_list *tail_list;
+ u16 tmp_c_stat;
dma_addr_t head_list_phys;
- TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n",
- priv->rxHead, priv->rxTail );
- head_list = priv->rxList + priv->rxHead;
- head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead;
+ TLAN_DBG(TLAN_DEBUG_RX, "RECEIVE: handling RX EOF (Head=%d Tail=%d)\n",
+ priv->rx_head, priv->rx_tail);
+ head_list = priv->rx_list + priv->rx_head;
+ head_list_phys =
+ priv->rx_list_dma + sizeof(struct tlan_list)*priv->rx_head;
- while (((tmpCStat = head_list->cStat) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) {
- dma_addr_t frameDma = head_list->buffer[0].address;
- u32 frameSize = head_list->frameSize;
+ while (((tmp_c_stat = head_list->c_stat) & TLAN_CSTAT_FRM_CMP)
+ && (ack < 255)) {
+ dma_addr_t frame_dma = head_list->buffer[0].address;
+ u32 frame_size = head_list->frame_size;
struct sk_buff *new_skb;
ack++;
- if (tmpCStat & TLAN_CSTAT_EOC)
+ if (tmp_c_stat & TLAN_CSTAT_EOC)
eoc = 1;
new_skb = netdev_alloc_skb_ip_align(dev,
TLAN_MAX_FRAME_SIZE + 5);
- if ( !new_skb )
+ if (!new_skb)
goto drop_and_reuse;
- skb = TLan_GetSKB(head_list);
- pci_unmap_single(priv->pciDev, frameDma,
+ skb = tlan_get_skb(head_list);
+ pci_unmap_single(priv->pci_dev, frame_dma,
TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
- skb_put( skb, frameSize );
+ skb_put(skb, frame_size);
- dev->stats.rx_bytes += frameSize;
+ dev->stats.rx_bytes += frame_size;
- skb->protocol = eth_type_trans( skb, dev );
- netif_rx( skb );
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
- head_list->buffer[0].address = pci_map_single(priv->pciDev,
- new_skb->data,
- TLAN_MAX_FRAME_SIZE,
- PCI_DMA_FROMDEVICE);
+ head_list->buffer[0].address =
+ pci_map_single(priv->pci_dev, new_skb->data,
+ TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
- TLan_StoreSKB(head_list, new_skb);
+ tlan_store_skb(head_list, new_skb);
drop_and_reuse:
head_list->forward = 0;
- head_list->cStat = 0;
- tail_list = priv->rxList + priv->rxTail;
+ head_list->c_stat = 0;
+ tail_list = priv->rx_list + priv->rx_tail;
tail_list->forward = head_list_phys;
- CIRC_INC( priv->rxHead, TLAN_NUM_RX_LISTS );
- CIRC_INC( priv->rxTail, TLAN_NUM_RX_LISTS );
- head_list = priv->rxList + priv->rxHead;
- head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead;
+ CIRC_INC(priv->rx_head, TLAN_NUM_RX_LISTS);
+ CIRC_INC(priv->rx_tail, TLAN_NUM_RX_LISTS);
+ head_list = priv->rx_list + priv->rx_head;
+ head_list_phys = priv->rx_list_dma
+ + sizeof(struct tlan_list)*priv->rx_head;
}
if (!ack)
- printk(KERN_INFO "TLAN: Received interrupt for uncompleted RX frame.\n");
+ printk(KERN_INFO
+ "TLAN: Received interrupt for uncompleted RX frame.\n");
- if ( eoc ) {
- TLAN_DBG( TLAN_DEBUG_RX,
- "RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n",
- priv->rxHead, priv->rxTail );
- head_list = priv->rxList + priv->rxHead;
- head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead;
- outl(head_list_phys, dev->base_addr + TLAN_CH_PARM );
+ if (eoc) {
+ TLAN_DBG(TLAN_DEBUG_RX,
+ "RECEIVE: handling RX EOC (Head=%d Tail=%d)\n",
+ priv->rx_head, priv->rx_tail);
+ head_list = priv->rx_list + priv->rx_head;
+ head_list_phys = priv->rx_list_dma
+ + sizeof(struct tlan_list)*priv->rx_head;
+ outl(head_list_phys, dev->base_addr + TLAN_CH_PARM);
ack |= TLAN_HC_GO | TLAN_HC_RT;
- priv->rxEocCount++;
+ priv->rx_eoc_count++;
}
- if ( priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED ) {
- TLan_DioWrite8( dev->base_addr,
- TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT );
- if ( priv->timer.function == NULL ) {
- priv->timer.function = TLan_Timer;
+ if (priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED) {
+ tlan_dio_write8(dev->base_addr,
+ TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
+ if (priv->timer.function == NULL) {
+ priv->timer.function = tlan_timer;
priv->timer.data = (unsigned long) dev;
priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
- priv->timerSetAt = jiffies;
- priv->timerType = TLAN_TIMER_ACTIVITY;
+ priv->timer_set_at = jiffies;
+ priv->timer_type = TLAN_TIMER_ACTIVITY;
add_timer(&priv->timer);
- } else if ( priv->timerType == TLAN_TIMER_ACTIVITY ) {
- priv->timerSetAt = jiffies;
+ } else if (priv->timer_type == TLAN_TIMER_ACTIVITY) {
+ priv->timer_set_at = jiffies;
}
}
return ack;
-} /* TLan_HandleRxEOF */
+}
- /***************************************************************
- * TLan_HandleDummy
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This function handles the Dummy interrupt, which is
- * raised whenever a test interrupt is generated by setting
- * the Req_Int bit of HOST_CMD to 1.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_dummy
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This function handles the Dummy interrupt, which is
+ * raised whenever a test interrupt is generated by setting
+ * the Req_Int bit of HOST_CMD to 1.
+ *
+ **************************************************************/
-static u32 TLan_HandleDummy( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_dummy(struct net_device *dev, u16 host_int)
{
- printk( "TLAN: Test interrupt on %s.\n", dev->name );
+ pr_info("TLAN: Test interrupt on %s.\n", dev->name);
return 1;
-} /* TLan_HandleDummy */
+}
- /***************************************************************
- * TLan_HandleTxEOC
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This driver is structured to determine EOC occurrences by
- * reading the CSTAT member of the list structure. Tx EOC
- * interrupts are disabled via the DIO INTDIS register.
- * However, TLAN chips before revision 3.0 didn't have this
- * functionality, so process EOC events if this is the
- * case.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_tx_eoc
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This driver is structured to determine EOC occurrences by
+ * reading the CSTAT member of the list structure. Tx EOC
+ * interrupts are disabled via the DIO INTDIS register.
+ * However, TLAN chips before revision 3.0 didn't have this
+ * functionality, so process EOC events if this is the
+ * case.
+ *
+ **************************************************************/
-static u32 TLan_HandleTxEOC( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_tx_eoc(struct net_device *dev, u16 host_int)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
- TLanList *head_list;
+ struct tlan_priv *priv = netdev_priv(dev);
+ struct tlan_list *head_list;
dma_addr_t head_list_phys;
u32 ack = 1;
host_int = 0;
- if ( priv->tlanRev < 0x30 ) {
- TLAN_DBG( TLAN_DEBUG_TX,
- "TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- IRQ\n",
- priv->txHead, priv->txTail );
- head_list = priv->txList + priv->txHead;
- head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead;
- if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) {
+ if (priv->tlan_rev < 0x30) {
+ TLAN_DBG(TLAN_DEBUG_TX,
+ "TRANSMIT: handling TX EOC (Head=%d Tail=%d) -- IRQ\n",
+ priv->tx_head, priv->tx_tail);
+ head_list = priv->tx_list + priv->tx_head;
+ head_list_phys = priv->tx_list_dma
+ + sizeof(struct tlan_list)*priv->tx_head;
+ if (head_list->c_stat & TLAN_CSTAT_READY) {
netif_stop_queue(dev);
- outl( head_list_phys, dev->base_addr + TLAN_CH_PARM );
+ outl(head_list_phys, dev->base_addr + TLAN_CH_PARM);
ack |= TLAN_HC_GO;
} else {
- priv->txInProgress = 0;
+ priv->tx_in_progress = 0;
}
}
return ack;
-} /* TLan_HandleTxEOC */
+}
- /***************************************************************
- * TLan_HandleStatusCheck
- *
- * Returns:
- * 0 if Adapter check, 1 if Network Status check.
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This function handles Adapter Check/Network Status
- * interrupts generated by the adapter. It checks the
- * vector in the HOST_INT register to determine if it is
- * an Adapter Check interrupt. If so, it resets the
- * adapter. Otherwise it clears the status registers
- * and services the PHY.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_status_check
+ *
+ * Returns:
+ * 0 if Adapter check, 1 if Network Status check.
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This function handles Adapter Check/Network Status
+ * interrupts generated by the adapter. It checks the
+ * vector in the HOST_INT register to determine if it is
+ * an Adapter Check interrupt. If so, it resets the
+ * adapter. Otherwise it clears the status registers
+ * and services the PHY.
+ *
+ **************************************************************/
-static u32 TLan_HandleStatusCheck( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_status_check(struct net_device *dev, u16 host_int)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u32 ack;
u32 error;
u8 net_sts;
@@ -1727,92 +1813,94 @@
u16 tlphy_sts;
ack = 1;
- if ( host_int & TLAN_HI_IV_MASK ) {
- netif_stop_queue( dev );
- error = inl( dev->base_addr + TLAN_CH_PARM );
- printk( "TLAN: %s: Adaptor Error = 0x%x\n", dev->name, error );
- TLan_ReadAndClearStats( dev, TLAN_RECORD );
- outl( TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD );
+ if (host_int & TLAN_HI_IV_MASK) {
+ netif_stop_queue(dev);
+ error = inl(dev->base_addr + TLAN_CH_PARM);
+ pr_info("TLAN: %s: Adaptor Error = 0x%x\n", dev->name, error);
+ tlan_read_and_clear_stats(dev, TLAN_RECORD);
+ outl(TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD);
schedule_work(&priv->tlan_tqueue);
netif_wake_queue(dev);
ack = 0;
} else {
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Status Check\n", dev->name );
- phy = priv->phy[priv->phyNum];
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Status Check\n", dev->name);
+ phy = priv->phy[priv->phy_num];
- net_sts = TLan_DioRead8( dev->base_addr, TLAN_NET_STS );
- if ( net_sts ) {
- TLan_DioWrite8( dev->base_addr, TLAN_NET_STS, net_sts );
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Net_Sts = %x\n",
- dev->name, (unsigned) net_sts );
+ net_sts = tlan_dio_read8(dev->base_addr, TLAN_NET_STS);
+ if (net_sts) {
+ tlan_dio_write8(dev->base_addr, TLAN_NET_STS, net_sts);
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Net_Sts = %x\n",
+ dev->name, (unsigned) net_sts);
}
- if ( ( net_sts & TLAN_NET_STS_MIRQ ) && ( priv->phyNum == 0 ) ) {
- TLan_MiiReadReg( dev, phy, TLAN_TLPHY_STS, &tlphy_sts );
- TLan_MiiReadReg( dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl );
- if ( ! ( tlphy_sts & TLAN_TS_POLOK ) &&
- ! ( tlphy_ctl & TLAN_TC_SWAPOL ) ) {
- tlphy_ctl |= TLAN_TC_SWAPOL;
- TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tlphy_ctl);
- } else if ( ( tlphy_sts & TLAN_TS_POLOK ) &&
- ( tlphy_ctl & TLAN_TC_SWAPOL ) ) {
- tlphy_ctl &= ~TLAN_TC_SWAPOL;
- TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tlphy_ctl);
- }
-
- if (debug) {
- TLan_PhyPrint( dev );
+ if ((net_sts & TLAN_NET_STS_MIRQ) && (priv->phy_num == 0)) {
+ tlan_mii_read_reg(dev, phy, TLAN_TLPHY_STS, &tlphy_sts);
+ tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl);
+ if (!(tlphy_sts & TLAN_TS_POLOK) &&
+ !(tlphy_ctl & TLAN_TC_SWAPOL)) {
+ tlphy_ctl |= TLAN_TC_SWAPOL;
+ tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
+ tlphy_ctl);
+ } else if ((tlphy_sts & TLAN_TS_POLOK) &&
+ (tlphy_ctl & TLAN_TC_SWAPOL)) {
+ tlphy_ctl &= ~TLAN_TC_SWAPOL;
+ tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
+ tlphy_ctl);
}
+
+ if (debug)
+ tlan_phy_print(dev);
}
}
return ack;
-} /* TLan_HandleStatusCheck */
+}
- /***************************************************************
- * TLan_HandleRxEOC
- *
- * Returns:
- * 1
- * Parms:
- * dev Device assigned the IRQ that was
- * raised.
- * host_int The contents of the HOST_INT
- * port.
- *
- * This driver is structured to determine EOC occurrences by
- * reading the CSTAT member of the list structure. Rx EOC
- * interrupts are disabled via the DIO INTDIS register.
- * However, TLAN chips before revision 3.0 didn't have this
- * CSTAT member or a INTDIS register, so if this chip is
- * pre-3.0, process EOC interrupts normally.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_handle_rx_eoc
+ *
+ * Returns:
+ * 1
+ * Parms:
+ * dev Device assigned the IRQ that was
+ * raised.
+ * host_int The contents of the HOST_INT
+ * port.
+ *
+ * This driver is structured to determine EOC occurrences by
+ * reading the CSTAT member of the list structure. Rx EOC
+ * interrupts are disabled via the DIO INTDIS register.
+ * However, TLAN chips before revision 3.0 didn't have this
+ * CSTAT member or a INTDIS register, so if this chip is
+ * pre-3.0, process EOC interrupts normally.
+ *
+ **************************************************************/
-static u32 TLan_HandleRxEOC( struct net_device *dev, u16 host_int )
+static u32 tlan_handle_rx_eoc(struct net_device *dev, u16 host_int)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
dma_addr_t head_list_phys;
u32 ack = 1;
- if ( priv->tlanRev < 0x30 ) {
- TLAN_DBG( TLAN_DEBUG_RX,
- "RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- IRQ\n",
- priv->rxHead, priv->rxTail );
- head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead;
- outl( head_list_phys, dev->base_addr + TLAN_CH_PARM );
+ if (priv->tlan_rev < 0x30) {
+ TLAN_DBG(TLAN_DEBUG_RX,
+ "RECEIVE: Handling RX EOC (head=%d tail=%d) -- IRQ\n",
+ priv->rx_head, priv->rx_tail);
+ head_list_phys = priv->rx_list_dma
+ + sizeof(struct tlan_list)*priv->rx_head;
+ outl(head_list_phys, dev->base_addr + TLAN_CH_PARM);
ack |= TLAN_HC_GO | TLAN_HC_RT;
- priv->rxEocCount++;
+ priv->rx_eoc_count++;
}
return ack;
-} /* TLan_HandleRxEOC */
+}
@@ -1820,98 +1908,98 @@
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver Timer Function
+ThunderLAN driver timer function
******************************************************************************
*****************************************************************************/
- /***************************************************************
- * TLan_Timer
- *
- * Returns:
- * Nothing
- * Parms:
- * data A value given to add timer when
- * add_timer was called.
- *
- * This function handles timed functionality for the
- * TLAN driver. The two current timer uses are for
- * delaying for autonegotionation and driving the ACT LED.
- * - Autonegotiation requires being allowed about
- * 2 1/2 seconds before attempting to transmit a
- * packet. It would be a very bad thing to hang
- * the kernel this long, so the driver doesn't
- * allow transmission 'til after this time, for
- * certain PHYs. It would be much nicer if all
- * PHYs were interrupt-capable like the internal
- * PHY.
- * - The ACT LED, which shows adapter activity, is
- * driven by the driver, and so must be left on
- * for a short period to power up the LED so it
- * can be seen. This delay can be changed by
- * changing the TLAN_TIMER_ACT_DELAY in tlan.h,
- * if desired. 100 ms produces a slightly
- * sluggish response.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_timer
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * data A value given to add timer when
+ * add_timer was called.
+ *
+ * This function handles timed functionality for the
+ * TLAN driver. The two current timer uses are for
+ * delaying for autonegotionation and driving the ACT LED.
+ * - Autonegotiation requires being allowed about
+ * 2 1/2 seconds before attempting to transmit a
+ * packet. It would be a very bad thing to hang
+ * the kernel this long, so the driver doesn't
+ * allow transmission 'til after this time, for
+ * certain PHYs. It would be much nicer if all
+ * PHYs were interrupt-capable like the internal
+ * PHY.
+ * - The ACT LED, which shows adapter activity, is
+ * driven by the driver, and so must be left on
+ * for a short period to power up the LED so it
+ * can be seen. This delay can be changed by
+ * changing the TLAN_TIMER_ACT_DELAY in tlan.h,
+ * if desired. 100 ms produces a slightly
+ * sluggish response.
+ *
+ **************************************************************/
-static void TLan_Timer( unsigned long data )
+static void tlan_timer(unsigned long data)
{
struct net_device *dev = (struct net_device *) data;
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u32 elapsed;
unsigned long flags = 0;
priv->timer.function = NULL;
- switch ( priv->timerType ) {
+ switch (priv->timer_type) {
#ifdef MONITOR
- case TLAN_TIMER_LINK_BEAT:
- TLan_PhyMonitor( dev );
- break;
+ case TLAN_TIMER_LINK_BEAT:
+ tlan_phy_monitor(dev);
+ break;
#endif
- case TLAN_TIMER_PHY_PDOWN:
- TLan_PhyPowerDown( dev );
- break;
- case TLAN_TIMER_PHY_PUP:
- TLan_PhyPowerUp( dev );
- break;
- case TLAN_TIMER_PHY_RESET:
- TLan_PhyReset( dev );
- break;
- case TLAN_TIMER_PHY_START_LINK:
- TLan_PhyStartLink( dev );
- break;
- case TLAN_TIMER_PHY_FINISH_AN:
- TLan_PhyFinishAutoNeg( dev );
- break;
- case TLAN_TIMER_FINISH_RESET:
- TLan_FinishReset( dev );
- break;
- case TLAN_TIMER_ACTIVITY:
- spin_lock_irqsave(&priv->lock, flags);
- if ( priv->timer.function == NULL ) {
- elapsed = jiffies - priv->timerSetAt;
- if ( elapsed >= TLAN_TIMER_ACT_DELAY ) {
- TLan_DioWrite8( dev->base_addr,
- TLAN_LED_REG, TLAN_LED_LINK );
- } else {
- priv->timer.function = TLan_Timer;
- priv->timer.expires = priv->timerSetAt
- + TLAN_TIMER_ACT_DELAY;
- spin_unlock_irqrestore(&priv->lock, flags);
- add_timer( &priv->timer );
- break;
- }
+ case TLAN_TIMER_PHY_PDOWN:
+ tlan_phy_power_down(dev);
+ break;
+ case TLAN_TIMER_PHY_PUP:
+ tlan_phy_power_up(dev);
+ break;
+ case TLAN_TIMER_PHY_RESET:
+ tlan_phy_reset(dev);
+ break;
+ case TLAN_TIMER_PHY_START_LINK:
+ tlan_phy_start_link(dev);
+ break;
+ case TLAN_TIMER_PHY_FINISH_AN:
+ tlan_phy_finish_auto_neg(dev);
+ break;
+ case TLAN_TIMER_FINISH_RESET:
+ tlan_finish_reset(dev);
+ break;
+ case TLAN_TIMER_ACTIVITY:
+ spin_lock_irqsave(&priv->lock, flags);
+ if (priv->timer.function == NULL) {
+ elapsed = jiffies - priv->timer_set_at;
+ if (elapsed >= TLAN_TIMER_ACT_DELAY) {
+ tlan_dio_write8(dev->base_addr,
+ TLAN_LED_REG, TLAN_LED_LINK);
+ } else {
+ priv->timer.function = tlan_timer;
+ priv->timer.expires = priv->timer_set_at
+ + TLAN_TIMER_ACT_DELAY;
+ spin_unlock_irqrestore(&priv->lock, flags);
+ add_timer(&priv->timer);
+ break;
}
- spin_unlock_irqrestore(&priv->lock, flags);
- break;
- default:
- break;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ default:
+ break;
}
-} /* TLan_Timer */
+}
@@ -1919,39 +2007,39 @@
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver Adapter Related Routines
+ThunderLAN driver adapter related routines
******************************************************************************
*****************************************************************************/
- /***************************************************************
- * TLan_ResetLists
- *
- * Returns:
- * Nothing
- * Parms:
- * dev The device structure with the list
- * stuctures to be reset.
- *
- * This routine sets the variables associated with managing
- * the TLAN lists to their initial values.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_reset_lists
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev The device structure with the list
+ * stuctures to be reset.
+ *
+ * This routine sets the variables associated with managing
+ * the TLAN lists to their initial values.
+ *
+ **************************************************************/
-static void TLan_ResetLists( struct net_device *dev )
+static void tlan_reset_lists(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int i;
- TLanList *list;
+ struct tlan_list *list;
dma_addr_t list_phys;
struct sk_buff *skb;
- priv->txHead = 0;
- priv->txTail = 0;
- for ( i = 0; i < TLAN_NUM_TX_LISTS; i++ ) {
- list = priv->txList + i;
- list->cStat = TLAN_CSTAT_UNUSED;
+ priv->tx_head = 0;
+ priv->tx_tail = 0;
+ for (i = 0; i < TLAN_NUM_TX_LISTS; i++) {
+ list = priv->tx_list + i;
+ list->c_stat = TLAN_CSTAT_UNUSED;
list->buffer[0].address = 0;
list->buffer[2].count = 0;
list->buffer[2].address = 0;
@@ -1959,169 +2047,169 @@
list->buffer[9].address = 0;
}
- priv->rxHead = 0;
- priv->rxTail = TLAN_NUM_RX_LISTS - 1;
- for ( i = 0; i < TLAN_NUM_RX_LISTS; i++ ) {
- list = priv->rxList + i;
- list_phys = priv->rxListDMA + sizeof(TLanList) * i;
- list->cStat = TLAN_CSTAT_READY;
- list->frameSize = TLAN_MAX_FRAME_SIZE;
+ priv->rx_head = 0;
+ priv->rx_tail = TLAN_NUM_RX_LISTS - 1;
+ for (i = 0; i < TLAN_NUM_RX_LISTS; i++) {
+ list = priv->rx_list + i;
+ list_phys = priv->rx_list_dma + sizeof(struct tlan_list)*i;
+ list->c_stat = TLAN_CSTAT_READY;
+ list->frame_size = TLAN_MAX_FRAME_SIZE;
list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER;
skb = netdev_alloc_skb_ip_align(dev, TLAN_MAX_FRAME_SIZE + 5);
- if ( !skb ) {
- pr_err("TLAN: out of memory for received data.\n" );
+ if (!skb) {
+ pr_err("TLAN: out of memory for received data.\n");
break;
}
- list->buffer[0].address = pci_map_single(priv->pciDev,
+ list->buffer[0].address = pci_map_single(priv->pci_dev,
skb->data,
TLAN_MAX_FRAME_SIZE,
PCI_DMA_FROMDEVICE);
- TLan_StoreSKB(list, skb);
+ tlan_store_skb(list, skb);
list->buffer[1].count = 0;
list->buffer[1].address = 0;
- list->forward = list_phys + sizeof(TLanList);
+ list->forward = list_phys + sizeof(struct tlan_list);
}
/* in case ran out of memory early, clear bits */
while (i < TLAN_NUM_RX_LISTS) {
- TLan_StoreSKB(priv->rxList + i, NULL);
+ tlan_store_skb(priv->rx_list + i, NULL);
++i;
}
list->forward = 0;
-} /* TLan_ResetLists */
+}
-static void TLan_FreeLists( struct net_device *dev )
+static void tlan_free_lists(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int i;
- TLanList *list;
+ struct tlan_list *list;
struct sk_buff *skb;
- for ( i = 0; i < TLAN_NUM_TX_LISTS; i++ ) {
- list = priv->txList + i;
- skb = TLan_GetSKB(list);
- if ( skb ) {
+ for (i = 0; i < TLAN_NUM_TX_LISTS; i++) {
+ list = priv->tx_list + i;
+ skb = tlan_get_skb(list);
+ if (skb) {
pci_unmap_single(
- priv->pciDev,
+ priv->pci_dev,
list->buffer[0].address,
max(skb->len,
(unsigned int)TLAN_MIN_FRAME_SIZE),
PCI_DMA_TODEVICE);
- dev_kfree_skb_any( skb );
+ dev_kfree_skb_any(skb);
list->buffer[8].address = 0;
list->buffer[9].address = 0;
}
}
- for ( i = 0; i < TLAN_NUM_RX_LISTS; i++ ) {
- list = priv->rxList + i;
- skb = TLan_GetSKB(list);
- if ( skb ) {
- pci_unmap_single(priv->pciDev,
+ for (i = 0; i < TLAN_NUM_RX_LISTS; i++) {
+ list = priv->rx_list + i;
+ skb = tlan_get_skb(list);
+ if (skb) {
+ pci_unmap_single(priv->pci_dev,
list->buffer[0].address,
TLAN_MAX_FRAME_SIZE,
PCI_DMA_FROMDEVICE);
- dev_kfree_skb_any( skb );
+ dev_kfree_skb_any(skb);
list->buffer[8].address = 0;
list->buffer[9].address = 0;
}
}
-} /* TLan_FreeLists */
+}
- /***************************************************************
- * TLan_PrintDio
- *
- * Returns:
- * Nothing
- * Parms:
- * io_base Base IO port of the device of
- * which to print DIO registers.
- *
- * This function prints out all the internal (DIO)
- * registers of a TLAN chip.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_print_dio
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * io_base Base IO port of the device of
+ * which to print DIO registers.
+ *
+ * This function prints out all the internal (DIO)
+ * registers of a TLAN chip.
+ *
+ **************************************************************/
-static void TLan_PrintDio( u16 io_base )
+static void tlan_print_dio(u16 io_base)
{
u32 data0, data1;
int i;
- printk( "TLAN: Contents of internal registers for io base 0x%04hx.\n",
- io_base );
- printk( "TLAN: Off. +0 +4\n" );
- for ( i = 0; i < 0x4C; i+= 8 ) {
- data0 = TLan_DioRead32( io_base, i );
- data1 = TLan_DioRead32( io_base, i + 0x4 );
- printk( "TLAN: 0x%02x 0x%08x 0x%08x\n", i, data0, data1 );
+ pr_info("TLAN: Contents of internal registers for io base 0x%04hx.\n",
+ io_base);
+ pr_info("TLAN: Off. +0 +4\n");
+ for (i = 0; i < 0x4C; i += 8) {
+ data0 = tlan_dio_read32(io_base, i);
+ data1 = tlan_dio_read32(io_base, i + 0x4);
+ pr_info("TLAN: 0x%02x 0x%08x 0x%08x\n", i, data0, data1);
}
-} /* TLan_PrintDio */
+}
- /***************************************************************
- * TLan_PrintList
- *
- * Returns:
- * Nothing
- * Parms:
- * list A pointer to the TLanList structure to
- * be printed.
- * type A string to designate type of list,
- * "Rx" or "Tx".
- * num The index of the list.
- *
- * This function prints out the contents of the list
- * pointed to by the list parameter.
- *
- **************************************************************/
+/***************************************************************
+ * TLan_PrintList
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * list A pointer to the struct tlan_list structure to
+ * be printed.
+ * type A string to designate type of list,
+ * "Rx" or "Tx".
+ * num The index of the list.
+ *
+ * This function prints out the contents of the list
+ * pointed to by the list parameter.
+ *
+ **************************************************************/
-static void TLan_PrintList( TLanList *list, char *type, int num)
+static void tlan_print_list(struct tlan_list *list, char *type, int num)
{
int i;
- printk( "TLAN: %s List %d at %p\n", type, num, list );
- printk( "TLAN: Forward = 0x%08x\n", list->forward );
- printk( "TLAN: CSTAT = 0x%04hx\n", list->cStat );
- printk( "TLAN: Frame Size = 0x%04hx\n", list->frameSize );
- /* for ( i = 0; i < 10; i++ ) { */
- for ( i = 0; i < 2; i++ ) {
- printk( "TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n",
- i, list->buffer[i].count, list->buffer[i].address );
+ pr_info("TLAN: %s List %d at %p\n", type, num, list);
+ pr_info("TLAN: Forward = 0x%08x\n", list->forward);
+ pr_info("TLAN: CSTAT = 0x%04hx\n", list->c_stat);
+ pr_info("TLAN: Frame Size = 0x%04hx\n", list->frame_size);
+ /* for (i = 0; i < 10; i++) { */
+ for (i = 0; i < 2; i++) {
+ pr_info("TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n",
+ i, list->buffer[i].count, list->buffer[i].address);
}
-} /* TLan_PrintList */
+}
- /***************************************************************
- * TLan_ReadAndClearStats
- *
- * Returns:
- * Nothing
- * Parms:
- * dev Pointer to device structure of adapter
- * to which to read stats.
- * record Flag indicating whether to add
- *
- * This functions reads all the internal status registers
- * of the TLAN chip, which clears them as a side effect.
- * It then either adds the values to the device's status
- * struct, or discards them, depending on whether record
- * is TLAN_RECORD (!=0) or TLAN_IGNORE (==0).
- *
- **************************************************************/
+/***************************************************************
+ * tlan_read_and_clear_stats
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev Pointer to device structure of adapter
+ * to which to read stats.
+ * record Flag indicating whether to add
+ *
+ * This functions reads all the internal status registers
+ * of the TLAN chip, which clears them as a side effect.
+ * It then either adds the values to the device's status
+ * struct, or discards them, depending on whether record
+ * is TLAN_RECORD (!=0) or TLAN_IGNORE (==0).
+ *
+ **************************************************************/
-static void TLan_ReadAndClearStats( struct net_device *dev, int record )
+static void tlan_read_and_clear_stats(struct net_device *dev, int record)
{
u32 tx_good, tx_under;
u32 rx_good, rx_over;
@@ -2129,41 +2217,42 @@
u32 multi_col, single_col;
u32 excess_col, late_col, loss;
- outw( TLAN_GOOD_TX_FRMS, dev->base_addr + TLAN_DIO_ADR );
- tx_good = inb( dev->base_addr + TLAN_DIO_DATA );
- tx_good += inb( dev->base_addr + TLAN_DIO_DATA + 1 ) << 8;
- tx_good += inb( dev->base_addr + TLAN_DIO_DATA + 2 ) << 16;
- tx_under = inb( dev->base_addr + TLAN_DIO_DATA + 3 );
+ outw(TLAN_GOOD_TX_FRMS, dev->base_addr + TLAN_DIO_ADR);
+ tx_good = inb(dev->base_addr + TLAN_DIO_DATA);
+ tx_good += inb(dev->base_addr + TLAN_DIO_DATA + 1) << 8;
+ tx_good += inb(dev->base_addr + TLAN_DIO_DATA + 2) << 16;
+ tx_under = inb(dev->base_addr + TLAN_DIO_DATA + 3);
- outw( TLAN_GOOD_RX_FRMS, dev->base_addr + TLAN_DIO_ADR );
- rx_good = inb( dev->base_addr + TLAN_DIO_DATA );
- rx_good += inb( dev->base_addr + TLAN_DIO_DATA + 1 ) << 8;
- rx_good += inb( dev->base_addr + TLAN_DIO_DATA + 2 ) << 16;
- rx_over = inb( dev->base_addr + TLAN_DIO_DATA + 3 );
+ outw(TLAN_GOOD_RX_FRMS, dev->base_addr + TLAN_DIO_ADR);
+ rx_good = inb(dev->base_addr + TLAN_DIO_DATA);
+ rx_good += inb(dev->base_addr + TLAN_DIO_DATA + 1) << 8;
+ rx_good += inb(dev->base_addr + TLAN_DIO_DATA + 2) << 16;
+ rx_over = inb(dev->base_addr + TLAN_DIO_DATA + 3);
- outw( TLAN_DEFERRED_TX, dev->base_addr + TLAN_DIO_ADR );
- def_tx = inb( dev->base_addr + TLAN_DIO_DATA );
- def_tx += inb( dev->base_addr + TLAN_DIO_DATA + 1 ) << 8;
- crc = inb( dev->base_addr + TLAN_DIO_DATA + 2 );
- code = inb( dev->base_addr + TLAN_DIO_DATA + 3 );
+ outw(TLAN_DEFERRED_TX, dev->base_addr + TLAN_DIO_ADR);
+ def_tx = inb(dev->base_addr + TLAN_DIO_DATA);
+ def_tx += inb(dev->base_addr + TLAN_DIO_DATA + 1) << 8;
+ crc = inb(dev->base_addr + TLAN_DIO_DATA + 2);
+ code = inb(dev->base_addr + TLAN_DIO_DATA + 3);
- outw( TLAN_MULTICOL_FRMS, dev->base_addr + TLAN_DIO_ADR );
- multi_col = inb( dev->base_addr + TLAN_DIO_DATA );
- multi_col += inb( dev->base_addr + TLAN_DIO_DATA + 1 ) << 8;
- single_col = inb( dev->base_addr + TLAN_DIO_DATA + 2 );
- single_col += inb( dev->base_addr + TLAN_DIO_DATA + 3 ) << 8;
+ outw(TLAN_MULTICOL_FRMS, dev->base_addr + TLAN_DIO_ADR);
+ multi_col = inb(dev->base_addr + TLAN_DIO_DATA);
+ multi_col += inb(dev->base_addr + TLAN_DIO_DATA + 1) << 8;
+ single_col = inb(dev->base_addr + TLAN_DIO_DATA + 2);
+ single_col += inb(dev->base_addr + TLAN_DIO_DATA + 3) << 8;
- outw( TLAN_EXCESSCOL_FRMS, dev->base_addr + TLAN_DIO_ADR );
- excess_col = inb( dev->base_addr + TLAN_DIO_DATA );
- late_col = inb( dev->base_addr + TLAN_DIO_DATA + 1 );
- loss = inb( dev->base_addr + TLAN_DIO_DATA + 2 );
+ outw(TLAN_EXCESSCOL_FRMS, dev->base_addr + TLAN_DIO_ADR);
+ excess_col = inb(dev->base_addr + TLAN_DIO_DATA);
+ late_col = inb(dev->base_addr + TLAN_DIO_DATA + 1);
+ loss = inb(dev->base_addr + TLAN_DIO_DATA + 2);
- if ( record ) {
+ if (record) {
dev->stats.rx_packets += rx_good;
dev->stats.rx_errors += rx_over + crc + code;
dev->stats.tx_packets += tx_good;
dev->stats.tx_errors += tx_under + loss;
- dev->stats.collisions += multi_col + single_col + excess_col + late_col;
+ dev->stats.collisions += multi_col
+ + single_col + excess_col + late_col;
dev->stats.rx_over_errors += rx_over;
dev->stats.rx_crc_errors += crc;
@@ -2173,39 +2262,39 @@
dev->stats.tx_carrier_errors += loss;
}
-} /* TLan_ReadAndClearStats */
+}
- /***************************************************************
- * TLan_Reset
- *
- * Returns:
- * 0
- * Parms:
- * dev Pointer to device structure of adapter
- * to be reset.
- *
- * This function resets the adapter and it's physical
- * device. See Chap. 3, pp. 9-10 of the "ThunderLAN
- * Programmer's Guide" for details. The routine tries to
- * implement what is detailed there, though adjustments
- * have been made.
- *
- **************************************************************/
+/***************************************************************
+ * TLan_Reset
+ *
+ * Returns:
+ * 0
+ * Parms:
+ * dev Pointer to device structure of adapter
+ * to be reset.
+ *
+ * This function resets the adapter and it's physical
+ * device. See Chap. 3, pp. 9-10 of the "ThunderLAN
+ * Programmer's Guide" for details. The routine tries to
+ * implement what is detailed there, though adjustments
+ * have been made.
+ *
+ **************************************************************/
static void
-TLan_ResetAdapter( struct net_device *dev )
+tlan_reset_adapter(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
int i;
u32 addr;
u32 data;
u8 data8;
- priv->tlanFullDuplex = false;
- priv->phyOnline=0;
+ priv->tlan_full_duplex = false;
+ priv->phy_online = 0;
netif_carrier_off(dev);
/* 1. Assert reset bit. */
@@ -2216,7 +2305,7 @@
udelay(1000);
-/* 2. Turn off interrupts. ( Probably isn't necessary ) */
+/* 2. Turn off interrupts. (Probably isn't necessary) */
data = inl(dev->base_addr + TLAN_HOST_CMD);
data |= TLAN_HC_INT_OFF;
@@ -2224,207 +2313,208 @@
/* 3. Clear AREGs and HASHs. */
- for ( i = TLAN_AREG_0; i <= TLAN_HASH_2; i += 4 ) {
- TLan_DioWrite32( dev->base_addr, (u16) i, 0 );
- }
+ for (i = TLAN_AREG_0; i <= TLAN_HASH_2; i += 4)
+ tlan_dio_write32(dev->base_addr, (u16) i, 0);
/* 4. Setup NetConfig register. */
data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN | TLAN_NET_CFG_PHY_EN;
- TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, (u16) data );
+ tlan_dio_write16(dev->base_addr, TLAN_NET_CONFIG, (u16) data);
/* 5. Load Ld_Tmr and Ld_Thr in HOST_CMD. */
- outl( TLAN_HC_LD_TMR | 0x3f, dev->base_addr + TLAN_HOST_CMD );
- outl( TLAN_HC_LD_THR | 0x9, dev->base_addr + TLAN_HOST_CMD );
+ outl(TLAN_HC_LD_TMR | 0x3f, dev->base_addr + TLAN_HOST_CMD);
+ outl(TLAN_HC_LD_THR | 0x9, dev->base_addr + TLAN_HOST_CMD);
/* 6. Unreset the MII by setting NMRST (in NetSio) to 1. */
- outw( TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR);
addr = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO;
- TLan_SetBit( TLAN_NET_SIO_NMRST, addr );
+ tlan_set_bit(TLAN_NET_SIO_NMRST, addr);
/* 7. Setup the remaining registers. */
- if ( priv->tlanRev >= 0x30 ) {
+ if (priv->tlan_rev >= 0x30) {
data8 = TLAN_ID_TX_EOC | TLAN_ID_RX_EOC;
- TLan_DioWrite8( dev->base_addr, TLAN_INT_DIS, data8 );
+ tlan_dio_write8(dev->base_addr, TLAN_INT_DIS, data8);
}
- TLan_PhyDetect( dev );
+ tlan_phy_detect(dev);
data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN;
- if ( priv->adapter->flags & TLAN_ADAPTER_BIT_RATE_PHY ) {
+ if (priv->adapter->flags & TLAN_ADAPTER_BIT_RATE_PHY) {
data |= TLAN_NET_CFG_BIT;
- if ( priv->aui == 1 ) {
- TLan_DioWrite8( dev->base_addr, TLAN_ACOMMIT, 0x0a );
- } else if ( priv->duplex == TLAN_DUPLEX_FULL ) {
- TLan_DioWrite8( dev->base_addr, TLAN_ACOMMIT, 0x00 );
- priv->tlanFullDuplex = true;
+ if (priv->aui == 1) {
+ tlan_dio_write8(dev->base_addr, TLAN_ACOMMIT, 0x0a);
+ } else if (priv->duplex == TLAN_DUPLEX_FULL) {
+ tlan_dio_write8(dev->base_addr, TLAN_ACOMMIT, 0x00);
+ priv->tlan_full_duplex = true;
} else {
- TLan_DioWrite8( dev->base_addr, TLAN_ACOMMIT, 0x08 );
+ tlan_dio_write8(dev->base_addr, TLAN_ACOMMIT, 0x08);
}
}
- if ( priv->phyNum == 0 ) {
+ if (priv->phy_num == 0)
data |= TLAN_NET_CFG_PHY_EN;
- }
- TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, (u16) data );
+ tlan_dio_write16(dev->base_addr, TLAN_NET_CONFIG, (u16) data);
- if ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) {
- TLan_FinishReset( dev );
- } else {
- TLan_PhyPowerDown( dev );
- }
+ if (priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY)
+ tlan_finish_reset(dev);
+ else
+ tlan_phy_power_down(dev);
-} /* TLan_ResetAdapter */
+}
static void
-TLan_FinishReset( struct net_device *dev )
+tlan_finish_reset(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u8 data;
u32 phy;
u8 sio;
u16 status;
u16 partner;
u16 tlphy_ctl;
- u16 tlphy_par;
+ u16 tlphy_par;
u16 tlphy_id1, tlphy_id2;
- int i;
+ int i;
- phy = priv->phy[priv->phyNum];
+ phy = priv->phy[priv->phy_num];
data = TLAN_NET_CMD_NRESET | TLAN_NET_CMD_NWRAP;
- if ( priv->tlanFullDuplex ) {
+ if (priv->tlan_full_duplex)
data |= TLAN_NET_CMD_DUPLEX;
- }
- TLan_DioWrite8( dev->base_addr, TLAN_NET_CMD, data );
+ tlan_dio_write8(dev->base_addr, TLAN_NET_CMD, data);
data = TLAN_NET_MASK_MASK4 | TLAN_NET_MASK_MASK5;
- if ( priv->phyNum == 0 ) {
+ if (priv->phy_num == 0)
data |= TLAN_NET_MASK_MASK7;
- }
- TLan_DioWrite8( dev->base_addr, TLAN_NET_MASK, data );
- TLan_DioWrite16( dev->base_addr, TLAN_MAX_RX, ((1536)+7)&~7 );
- TLan_MiiReadReg( dev, phy, MII_GEN_ID_HI, &tlphy_id1 );
- TLan_MiiReadReg( dev, phy, MII_GEN_ID_LO, &tlphy_id2 );
+ tlan_dio_write8(dev->base_addr, TLAN_NET_MASK, data);
+ tlan_dio_write16(dev->base_addr, TLAN_MAX_RX, ((1536)+7)&~7);
+ tlan_mii_read_reg(dev, phy, MII_GEN_ID_HI, &tlphy_id1);
+ tlan_mii_read_reg(dev, phy, MII_GEN_ID_LO, &tlphy_id2);
- if ( ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) ||
- ( priv->aui ) ) {
+ if ((priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY) ||
+ (priv->aui)) {
status = MII_GS_LINK;
- printk( "TLAN: %s: Link forced.\n", dev->name );
+ pr_info("TLAN: %s: Link forced.\n", dev->name);
} else {
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status );
- udelay( 1000 );
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status );
- if ( (status & MII_GS_LINK) &&
- /* We only support link info on Nat.Sem. PHY's */
- (tlphy_id1 == NAT_SEM_ID1) &&
- (tlphy_id2 == NAT_SEM_ID2) ) {
- TLan_MiiReadReg( dev, phy, MII_AN_LPA, &partner );
- TLan_MiiReadReg( dev, phy, TLAN_TLPHY_PAR, &tlphy_par );
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &status);
+ udelay(1000);
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &status);
+ if ((status & MII_GS_LINK) &&
+ /* We only support link info on Nat.Sem. PHY's */
+ (tlphy_id1 == NAT_SEM_ID1) &&
+ (tlphy_id2 == NAT_SEM_ID2)) {
+ tlan_mii_read_reg(dev, phy, MII_AN_LPA, &partner);
+ tlan_mii_read_reg(dev, phy, TLAN_TLPHY_PAR, &tlphy_par);
- printk( "TLAN: %s: Link active with ", dev->name );
+ pr_info("TLAN: %s: Link active with ", dev->name);
if (!(tlphy_par & TLAN_PHY_AN_EN_STAT)) {
- printk( "forced 10%sMbps %s-Duplex\n",
- tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0",
- tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half");
+ pr_info("forced 10%sMbps %s-Duplex\n",
+ tlphy_par & TLAN_PHY_SPEED_100
+ ? "" : "0",
+ tlphy_par & TLAN_PHY_DUPLEX_FULL
+ ? "Full" : "Half");
} else {
- printk( "AutoNegotiation enabled, at 10%sMbps %s-Duplex\n",
- tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0",
- tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half");
- printk("TLAN: Partner capability: ");
- for (i = 5; i <= 10; i++)
- if (partner & (1<<i))
- printk("%s",media[i-5]);
+ pr_info("Autonegotiation enabled, at 10%sMbps %s-Duplex\n",
+ tlphy_par & TLAN_PHY_SPEED_100
+ ? "" : "0",
+ tlphy_par & TLAN_PHY_DUPLEX_FULL
+ ? "Full" : "half");
+ pr_info("TLAN: Partner capability: ");
+ for (i = 5; i <= 10; i++)
+ if (partner & (1<<i))
+ printk("%s", media[i-5]);
printk("\n");
}
- TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK );
+ tlan_dio_write8(dev->base_addr, TLAN_LED_REG,
+ TLAN_LED_LINK);
#ifdef MONITOR
/* We have link beat..for now anyway */
- priv->link = 1;
- /*Enabling link beat monitoring */
- TLan_SetTimer( dev, (10*HZ), TLAN_TIMER_LINK_BEAT );
+ priv->link = 1;
+ /*Enabling link beat monitoring */
+ tlan_set_timer(dev, (10*HZ), TLAN_TIMER_LINK_BEAT);
#endif
} else if (status & MII_GS_LINK) {
- printk( "TLAN: %s: Link active\n", dev->name );
- TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK );
+ pr_info("TLAN: %s: Link active\n", dev->name);
+ tlan_dio_write8(dev->base_addr, TLAN_LED_REG,
+ TLAN_LED_LINK);
}
}
- if ( priv->phyNum == 0 ) {
- TLan_MiiReadReg( dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl );
- tlphy_ctl |= TLAN_TC_INTEN;
- TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tlphy_ctl );
- sio = TLan_DioRead8( dev->base_addr, TLAN_NET_SIO );
- sio |= TLAN_NET_SIO_MINTEN;
- TLan_DioWrite8( dev->base_addr, TLAN_NET_SIO, sio );
+ if (priv->phy_num == 0) {
+ tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl);
+ tlphy_ctl |= TLAN_TC_INTEN;
+ tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, tlphy_ctl);
+ sio = tlan_dio_read8(dev->base_addr, TLAN_NET_SIO);
+ sio |= TLAN_NET_SIO_MINTEN;
+ tlan_dio_write8(dev->base_addr, TLAN_NET_SIO, sio);
}
- if ( status & MII_GS_LINK ) {
- TLan_SetMac( dev, 0, dev->dev_addr );
- priv->phyOnline = 1;
- outb( ( TLAN_HC_INT_ON >> 8 ), dev->base_addr + TLAN_HOST_CMD + 1 );
- if ( debug >= 1 && debug != TLAN_DEBUG_PROBE ) {
- outb( ( TLAN_HC_REQ_INT >> 8 ), dev->base_addr + TLAN_HOST_CMD + 1 );
- }
- outl( priv->rxListDMA, dev->base_addr + TLAN_CH_PARM );
- outl( TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD );
+ if (status & MII_GS_LINK) {
+ tlan_set_mac(dev, 0, dev->dev_addr);
+ priv->phy_online = 1;
+ outb((TLAN_HC_INT_ON >> 8), dev->base_addr + TLAN_HOST_CMD + 1);
+ if (debug >= 1 && debug != TLAN_DEBUG_PROBE)
+ outb((TLAN_HC_REQ_INT >> 8),
+ dev->base_addr + TLAN_HOST_CMD + 1);
+ outl(priv->rx_list_dma, dev->base_addr + TLAN_CH_PARM);
+ outl(TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD);
netif_carrier_on(dev);
} else {
- printk( "TLAN: %s: Link inactive, will retry in 10 secs...\n",
- dev->name );
- TLan_SetTimer( dev, (10*HZ), TLAN_TIMER_FINISH_RESET );
+ pr_info("TLAN: %s: Link inactive, will retry in 10 secs...\n",
+ dev->name);
+ tlan_set_timer(dev, (10*HZ), TLAN_TIMER_FINISH_RESET);
return;
}
- TLan_SetMulticastList(dev);
+ tlan_set_multicast_list(dev);
-} /* TLan_FinishReset */
+}
- /***************************************************************
- * TLan_SetMac
- *
- * Returns:
- * Nothing
- * Parms:
- * dev Pointer to device structure of adapter
- * on which to change the AREG.
- * areg The AREG to set the address in (0 - 3).
- * mac A pointer to an array of chars. Each
- * element stores one byte of the address.
- * IE, it isn't in ascii.
- *
- * This function transfers a MAC address to one of the
- * TLAN AREGs (address registers). The TLAN chip locks
- * the register on writing to offset 0 and unlocks the
- * register after writing to offset 5. If NULL is passed
- * in mac, then the AREG is filled with 0's.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_set_mac
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev Pointer to device structure of adapter
+ * on which to change the AREG.
+ * areg The AREG to set the address in (0 - 3).
+ * mac A pointer to an array of chars. Each
+ * element stores one byte of the address.
+ * IE, it isn't in ascii.
+ *
+ * This function transfers a MAC address to one of the
+ * TLAN AREGs (address registers). The TLAN chip locks
+ * the register on writing to offset 0 and unlocks the
+ * register after writing to offset 5. If NULL is passed
+ * in mac, then the AREG is filled with 0's.
+ *
+ **************************************************************/
-static void TLan_SetMac( struct net_device *dev, int areg, char *mac )
+static void tlan_set_mac(struct net_device *dev, int areg, char *mac)
{
int i;
areg *= 6;
- if ( mac != NULL ) {
- for ( i = 0; i < 6; i++ )
- TLan_DioWrite8( dev->base_addr,
- TLAN_AREG_0 + areg + i, mac[i] );
+ if (mac != NULL) {
+ for (i = 0; i < 6; i++)
+ tlan_dio_write8(dev->base_addr,
+ TLAN_AREG_0 + areg + i, mac[i]);
} else {
- for ( i = 0; i < 6; i++ )
- TLan_DioWrite8( dev->base_addr,
- TLAN_AREG_0 + areg + i, 0 );
+ for (i = 0; i < 6; i++)
+ tlan_dio_write8(dev->base_addr,
+ TLAN_AREG_0 + areg + i, 0);
}
-} /* TLan_SetMac */
+}
@@ -2432,205 +2522,202 @@
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver PHY Layer Routines
+ThunderLAN driver PHY layer routines
******************************************************************************
*****************************************************************************/
- /*********************************************************************
- * TLan_PhyPrint
- *
- * Returns:
- * Nothing
- * Parms:
- * dev A pointer to the device structure of the
- * TLAN device having the PHYs to be detailed.
- *
- * This function prints the registers a PHY (aka transceiver).
- *
- ********************************************************************/
+/*********************************************************************
+ * tlan_phy_print
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev A pointer to the device structure of the
+ * TLAN device having the PHYs to be detailed.
+ *
+ * This function prints the registers a PHY (aka transceiver).
+ *
+ ********************************************************************/
-static void TLan_PhyPrint( struct net_device *dev )
+static void tlan_phy_print(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 i, data0, data1, data2, data3, phy;
- phy = priv->phy[priv->phyNum];
+ phy = priv->phy[priv->phy_num];
- if ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) {
- printk( "TLAN: Device %s, Unmanaged PHY.\n", dev->name );
- } else if ( phy <= TLAN_PHY_MAX_ADDR ) {
- printk( "TLAN: Device %s, PHY 0x%02x.\n", dev->name, phy );
- printk( "TLAN: Off. +0 +1 +2 +3\n" );
- for ( i = 0; i < 0x20; i+= 4 ) {
- printk( "TLAN: 0x%02x", i );
- TLan_MiiReadReg( dev, phy, i, &data0 );
- printk( " 0x%04hx", data0 );
- TLan_MiiReadReg( dev, phy, i + 1, &data1 );
- printk( " 0x%04hx", data1 );
- TLan_MiiReadReg( dev, phy, i + 2, &data2 );
- printk( " 0x%04hx", data2 );
- TLan_MiiReadReg( dev, phy, i + 3, &data3 );
- printk( " 0x%04hx\n", data3 );
+ if (priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY) {
+ pr_info("TLAN: Device %s, Unmanaged PHY.\n", dev->name);
+ } else if (phy <= TLAN_PHY_MAX_ADDR) {
+ pr_info("TLAN: Device %s, PHY 0x%02x.\n", dev->name, phy);
+ pr_info("TLAN: Off. +0 +1 +2 +3\n");
+ for (i = 0; i < 0x20; i += 4) {
+ pr_info("TLAN: 0x%02x", i);
+ tlan_mii_read_reg(dev, phy, i, &data0);
+ printk(" 0x%04hx", data0);
+ tlan_mii_read_reg(dev, phy, i + 1, &data1);
+ printk(" 0x%04hx", data1);
+ tlan_mii_read_reg(dev, phy, i + 2, &data2);
+ printk(" 0x%04hx", data2);
+ tlan_mii_read_reg(dev, phy, i + 3, &data3);
+ printk(" 0x%04hx\n", data3);
}
} else {
- printk( "TLAN: Device %s, Invalid PHY.\n", dev->name );
+ pr_info("TLAN: Device %s, Invalid PHY.\n", dev->name);
}
-} /* TLan_PhyPrint */
+}
- /*********************************************************************
- * TLan_PhyDetect
- *
- * Returns:
- * Nothing
- * Parms:
- * dev A pointer to the device structure of the adapter
- * for which the PHY needs determined.
- *
- * So far I've found that adapters which have external PHYs
- * may also use the internal PHY for part of the functionality.
- * (eg, AUI/Thinnet). This function finds out if this TLAN
- * chip has an internal PHY, and then finds the first external
- * PHY (starting from address 0) if it exists).
- *
- ********************************************************************/
+/*********************************************************************
+ * tlan_phy_detect
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev A pointer to the device structure of the adapter
+ * for which the PHY needs determined.
+ *
+ * So far I've found that adapters which have external PHYs
+ * may also use the internal PHY for part of the functionality.
+ * (eg, AUI/Thinnet). This function finds out if this TLAN
+ * chip has an internal PHY, and then finds the first external
+ * PHY (starting from address 0) if it exists).
+ *
+ ********************************************************************/
-static void TLan_PhyDetect( struct net_device *dev )
+static void tlan_phy_detect(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 control;
u16 hi;
u16 lo;
u32 phy;
- if ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) {
- priv->phyNum = 0xFFFF;
+ if (priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY) {
+ priv->phy_num = 0xffff;
return;
}
- TLan_MiiReadReg( dev, TLAN_PHY_MAX_ADDR, MII_GEN_ID_HI, &hi );
+ tlan_mii_read_reg(dev, TLAN_PHY_MAX_ADDR, MII_GEN_ID_HI, &hi);
- if ( hi != 0xFFFF ) {
+ if (hi != 0xffff)
priv->phy[0] = TLAN_PHY_MAX_ADDR;
- } else {
+ else
priv->phy[0] = TLAN_PHY_NONE;
- }
priv->phy[1] = TLAN_PHY_NONE;
- for ( phy = 0; phy <= TLAN_PHY_MAX_ADDR; phy++ ) {
- TLan_MiiReadReg( dev, phy, MII_GEN_CTL, &control );
- TLan_MiiReadReg( dev, phy, MII_GEN_ID_HI, &hi );
- TLan_MiiReadReg( dev, phy, MII_GEN_ID_LO, &lo );
- if ( ( control != 0xFFFF ) ||
- ( hi != 0xFFFF ) || ( lo != 0xFFFF ) ) {
- TLAN_DBG( TLAN_DEBUG_GNRL,
- "PHY found at %02x %04x %04x %04x\n",
- phy, control, hi, lo );
- if ( ( priv->phy[1] == TLAN_PHY_NONE ) &&
- ( phy != TLAN_PHY_MAX_ADDR ) ) {
+ for (phy = 0; phy <= TLAN_PHY_MAX_ADDR; phy++) {
+ tlan_mii_read_reg(dev, phy, MII_GEN_CTL, &control);
+ tlan_mii_read_reg(dev, phy, MII_GEN_ID_HI, &hi);
+ tlan_mii_read_reg(dev, phy, MII_GEN_ID_LO, &lo);
+ if ((control != 0xffff) ||
+ (hi != 0xffff) || (lo != 0xffff)) {
+ TLAN_DBG(TLAN_DEBUG_GNRL,
+ "PHY found at %02x %04x %04x %04x\n",
+ phy, control, hi, lo);
+ if ((priv->phy[1] == TLAN_PHY_NONE) &&
+ (phy != TLAN_PHY_MAX_ADDR)) {
priv->phy[1] = phy;
}
}
}
- if ( priv->phy[1] != TLAN_PHY_NONE ) {
- priv->phyNum = 1;
- } else if ( priv->phy[0] != TLAN_PHY_NONE ) {
- priv->phyNum = 0;
- } else {
- printk( "TLAN: Cannot initialize device, no PHY was found!\n" );
- }
+ if (priv->phy[1] != TLAN_PHY_NONE)
+ priv->phy_num = 1;
+ else if (priv->phy[0] != TLAN_PHY_NONE)
+ priv->phy_num = 0;
+ else
+ pr_info("TLAN: Cannot initialize device, no PHY was found!\n");
-} /* TLan_PhyDetect */
+}
-static void TLan_PhyPowerDown( struct net_device *dev )
+static void tlan_phy_power_down(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 value;
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Powering down PHY(s).\n", dev->name );
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Powering down PHY(s).\n", dev->name);
value = MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ISOLATE;
- TLan_MiiSync( dev->base_addr );
- TLan_MiiWriteReg( dev, priv->phy[priv->phyNum], MII_GEN_CTL, value );
- if ( ( priv->phyNum == 0 ) &&
- ( priv->phy[1] != TLAN_PHY_NONE ) &&
- ( ! ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) ) ) {
- TLan_MiiSync( dev->base_addr );
- TLan_MiiWriteReg( dev, priv->phy[1], MII_GEN_CTL, value );
+ tlan_mii_sync(dev->base_addr);
+ tlan_mii_write_reg(dev, priv->phy[priv->phy_num], MII_GEN_CTL, value);
+ if ((priv->phy_num == 0) &&
+ (priv->phy[1] != TLAN_PHY_NONE) &&
+ (!(priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10))) {
+ tlan_mii_sync(dev->base_addr);
+ tlan_mii_write_reg(dev, priv->phy[1], MII_GEN_CTL, value);
}
/* Wait for 50 ms and powerup
* This is abitrary. It is intended to make sure the
* transceiver settles.
*/
- TLan_SetTimer( dev, (HZ/20), TLAN_TIMER_PHY_PUP );
+ tlan_set_timer(dev, (HZ/20), TLAN_TIMER_PHY_PUP);
-} /* TLan_PhyPowerDown */
+}
-static void TLan_PhyPowerUp( struct net_device *dev )
+static void tlan_phy_power_up(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 value;
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Powering up PHY.\n", dev->name );
- TLan_MiiSync( dev->base_addr );
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Powering up PHY.\n", dev->name);
+ tlan_mii_sync(dev->base_addr);
value = MII_GC_LOOPBK;
- TLan_MiiWriteReg( dev, priv->phy[priv->phyNum], MII_GEN_CTL, value );
- TLan_MiiSync(dev->base_addr);
+ tlan_mii_write_reg(dev, priv->phy[priv->phy_num], MII_GEN_CTL, value);
+ tlan_mii_sync(dev->base_addr);
/* Wait for 500 ms and reset the
* transceiver. The TLAN docs say both 50 ms and
* 500 ms, so do the longer, just in case.
*/
- TLan_SetTimer( dev, (HZ/20), TLAN_TIMER_PHY_RESET );
+ tlan_set_timer(dev, (HZ/20), TLAN_TIMER_PHY_RESET);
-} /* TLan_PhyPowerUp */
+}
-static void TLan_PhyReset( struct net_device *dev )
+static void tlan_phy_reset(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 phy;
u16 value;
- phy = priv->phy[priv->phyNum];
+ phy = priv->phy[priv->phy_num];
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Reseting PHY.\n", dev->name );
- TLan_MiiSync( dev->base_addr );
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Reseting PHY.\n", dev->name);
+ tlan_mii_sync(dev->base_addr);
value = MII_GC_LOOPBK | MII_GC_RESET;
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, value );
- TLan_MiiReadReg( dev, phy, MII_GEN_CTL, &value );
- while ( value & MII_GC_RESET ) {
- TLan_MiiReadReg( dev, phy, MII_GEN_CTL, &value );
- }
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, value);
+ tlan_mii_read_reg(dev, phy, MII_GEN_CTL, &value);
+ while (value & MII_GC_RESET)
+ tlan_mii_read_reg(dev, phy, MII_GEN_CTL, &value);
/* Wait for 500 ms and initialize.
* I don't remember why I wait this long.
* I've changed this to 50ms, as it seems long enough.
*/
- TLan_SetTimer( dev, (HZ/20), TLAN_TIMER_PHY_START_LINK );
+ tlan_set_timer(dev, (HZ/20), TLAN_TIMER_PHY_START_LINK);
-} /* TLan_PhyReset */
+}
-static void TLan_PhyStartLink( struct net_device *dev )
+static void tlan_phy_start_link(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 ability;
u16 control;
u16 data;
@@ -2638,86 +2725,88 @@
u16 status;
u16 tctl;
- phy = priv->phy[priv->phyNum];
- TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Trying to activate link.\n", dev->name );
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status );
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &ability );
+ phy = priv->phy[priv->phy_num];
+ TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Trying to activate link.\n", dev->name);
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &status);
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &ability);
- if ( ( status & MII_GS_AUTONEG ) &&
- ( ! priv->aui ) ) {
+ if ((status & MII_GS_AUTONEG) &&
+ (!priv->aui)) {
ability = status >> 11;
- if ( priv->speed == TLAN_SPEED_10 &&
- priv->duplex == TLAN_DUPLEX_HALF) {
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x0000);
- } else if ( priv->speed == TLAN_SPEED_10 &&
- priv->duplex == TLAN_DUPLEX_FULL) {
- priv->tlanFullDuplex = true;
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x0100);
- } else if ( priv->speed == TLAN_SPEED_100 &&
- priv->duplex == TLAN_DUPLEX_HALF) {
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x2000);
- } else if ( priv->speed == TLAN_SPEED_100 &&
- priv->duplex == TLAN_DUPLEX_FULL) {
- priv->tlanFullDuplex = true;
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x2100);
+ if (priv->speed == TLAN_SPEED_10 &&
+ priv->duplex == TLAN_DUPLEX_HALF) {
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x0000);
+ } else if (priv->speed == TLAN_SPEED_10 &&
+ priv->duplex == TLAN_DUPLEX_FULL) {
+ priv->tlan_full_duplex = true;
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x0100);
+ } else if (priv->speed == TLAN_SPEED_100 &&
+ priv->duplex == TLAN_DUPLEX_HALF) {
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x2000);
+ } else if (priv->speed == TLAN_SPEED_100 &&
+ priv->duplex == TLAN_DUPLEX_FULL) {
+ priv->tlan_full_duplex = true;
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x2100);
} else {
/* Set Auto-Neg advertisement */
- TLan_MiiWriteReg( dev, phy, MII_AN_ADV, (ability << 5) | 1);
+ tlan_mii_write_reg(dev, phy, MII_AN_ADV,
+ (ability << 5) | 1);
/* Enablee Auto-Neg */
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x1000 );
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x1000);
/* Restart Auto-Neg */
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, 0x1200 );
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, 0x1200);
/* Wait for 4 sec for autonegotiation
- * to complete. The max spec time is less than this
- * but the card need additional time to start AN.
- * .5 sec should be plenty extra.
- */
- printk( "TLAN: %s: Starting autonegotiation.\n", dev->name );
- TLan_SetTimer( dev, (2*HZ), TLAN_TIMER_PHY_FINISH_AN );
+ * to complete. The max spec time is less than this
+ * but the card need additional time to start AN.
+ * .5 sec should be plenty extra.
+ */
+ pr_info("TLAN: %s: Starting autonegotiation.\n",
+ dev->name);
+ tlan_set_timer(dev, (2*HZ), TLAN_TIMER_PHY_FINISH_AN);
return;
}
}
- if ( ( priv->aui ) && ( priv->phyNum != 0 ) ) {
- priv->phyNum = 0;
- data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN | TLAN_NET_CFG_PHY_EN;
- TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, data );
- TLan_SetTimer( dev, (40*HZ/1000), TLAN_TIMER_PHY_PDOWN );
+ if ((priv->aui) && (priv->phy_num != 0)) {
+ priv->phy_num = 0;
+ data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN
+ | TLAN_NET_CFG_PHY_EN;
+ tlan_dio_write16(dev->base_addr, TLAN_NET_CONFIG, data);
+ tlan_set_timer(dev, (40*HZ/1000), TLAN_TIMER_PHY_PDOWN);
return;
- } else if ( priv->phyNum == 0 ) {
+ } else if (priv->phy_num == 0) {
control = 0;
- TLan_MiiReadReg( dev, phy, TLAN_TLPHY_CTL, &tctl );
- if ( priv->aui ) {
- tctl |= TLAN_TC_AUISEL;
+ tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tctl);
+ if (priv->aui) {
+ tctl |= TLAN_TC_AUISEL;
} else {
- tctl &= ~TLAN_TC_AUISEL;
- if ( priv->duplex == TLAN_DUPLEX_FULL ) {
+ tctl &= ~TLAN_TC_AUISEL;
+ if (priv->duplex == TLAN_DUPLEX_FULL) {
control |= MII_GC_DUPLEX;
- priv->tlanFullDuplex = true;
+ priv->tlan_full_duplex = true;
}
- if ( priv->speed == TLAN_SPEED_100 ) {
+ if (priv->speed == TLAN_SPEED_100)
control |= MII_GC_SPEEDSEL;
- }
}
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, control );
- TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tctl );
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL, control);
+ tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, tctl);
}
/* Wait for 2 sec to give the transceiver time
* to establish link.
*/
- TLan_SetTimer( dev, (4*HZ), TLAN_TIMER_FINISH_RESET );
+ tlan_set_timer(dev, (4*HZ), TLAN_TIMER_FINISH_RESET);
-} /* TLan_PhyStartLink */
+}
-static void TLan_PhyFinishAutoNeg( struct net_device *dev )
+static void tlan_phy_finish_auto_neg(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 an_adv;
u16 an_lpa;
u16 data;
@@ -2725,115 +2814,118 @@
u16 phy;
u16 status;
- phy = priv->phy[priv->phyNum];
+ phy = priv->phy[priv->phy_num];
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status );
- udelay( 1000 );
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status );
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &status);
+ udelay(1000);
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &status);
- if ( ! ( status & MII_GS_AUTOCMPLT ) ) {
+ if (!(status & MII_GS_AUTOCMPLT)) {
/* Wait for 8 sec to give the process
* more time. Perhaps we should fail after a while.
*/
- if (!priv->neg_be_verbose++) {
- pr_info("TLAN: Giving autonegotiation more time.\n");
- pr_info("TLAN: Please check that your adapter has\n");
- pr_info("TLAN: been properly connected to a HUB or Switch.\n");
- pr_info("TLAN: Trying to establish link in the background...\n");
- }
- TLan_SetTimer( dev, (8*HZ), TLAN_TIMER_PHY_FINISH_AN );
+ if (!priv->neg_be_verbose++) {
+ pr_info("TLAN: Giving autonegotiation more time.\n");
+ pr_info("TLAN: Please check that your adapter has\n");
+ pr_info("TLAN: been properly connected to a HUB or Switch.\n");
+ pr_info("TLAN: Trying to establish link in the background...\n");
+ }
+ tlan_set_timer(dev, (8*HZ), TLAN_TIMER_PHY_FINISH_AN);
return;
}
- printk( "TLAN: %s: Autonegotiation complete.\n", dev->name );
- TLan_MiiReadReg( dev, phy, MII_AN_ADV, &an_adv );
- TLan_MiiReadReg( dev, phy, MII_AN_LPA, &an_lpa );
+ pr_info("TLAN: %s: Autonegotiation complete.\n", dev->name);
+ tlan_mii_read_reg(dev, phy, MII_AN_ADV, &an_adv);
+ tlan_mii_read_reg(dev, phy, MII_AN_LPA, &an_lpa);
mode = an_adv & an_lpa & 0x03E0;
- if ( mode & 0x0100 ) {
- priv->tlanFullDuplex = true;
- } else if ( ! ( mode & 0x0080 ) && ( mode & 0x0040 ) ) {
- priv->tlanFullDuplex = true;
- }
+ if (mode & 0x0100)
+ priv->tlan_full_duplex = true;
+ else if (!(mode & 0x0080) && (mode & 0x0040))
+ priv->tlan_full_duplex = true;
- if ( ( ! ( mode & 0x0180 ) ) &&
- ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) &&
- ( priv->phyNum != 0 ) ) {
- priv->phyNum = 0;
- data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN | TLAN_NET_CFG_PHY_EN;
- TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, data );
- TLan_SetTimer( dev, (400*HZ/1000), TLAN_TIMER_PHY_PDOWN );
+ if ((!(mode & 0x0180)) &&
+ (priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10) &&
+ (priv->phy_num != 0)) {
+ priv->phy_num = 0;
+ data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN
+ | TLAN_NET_CFG_PHY_EN;
+ tlan_dio_write16(dev->base_addr, TLAN_NET_CONFIG, data);
+ tlan_set_timer(dev, (400*HZ/1000), TLAN_TIMER_PHY_PDOWN);
return;
}
- if ( priv->phyNum == 0 ) {
- if ( ( priv->duplex == TLAN_DUPLEX_FULL ) ||
- ( an_adv & an_lpa & 0x0040 ) ) {
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL,
- MII_GC_AUTOENB | MII_GC_DUPLEX );
- pr_info("TLAN: Starting internal PHY with FULL-DUPLEX\n" );
+ if (priv->phy_num == 0) {
+ if ((priv->duplex == TLAN_DUPLEX_FULL) ||
+ (an_adv & an_lpa & 0x0040)) {
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL,
+ MII_GC_AUTOENB | MII_GC_DUPLEX);
+ pr_info("TLAN: Starting internal PHY with FULL-DUPLEX\n");
} else {
- TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, MII_GC_AUTOENB );
- pr_info( "TLAN: Starting internal PHY with HALF-DUPLEX\n" );
+ tlan_mii_write_reg(dev, phy, MII_GEN_CTL,
+ MII_GC_AUTOENB);
+ pr_info("TLAN: Starting internal PHY with HALF-DUPLEX\n");
}
}
/* Wait for 100 ms. No reason in partiticular.
*/
- TLan_SetTimer( dev, (HZ/10), TLAN_TIMER_FINISH_RESET );
+ tlan_set_timer(dev, (HZ/10), TLAN_TIMER_FINISH_RESET);
-} /* TLan_PhyFinishAutoNeg */
+}
#ifdef MONITOR
- /*********************************************************************
- *
- * TLan_phyMonitor
- *
- * Returns:
- * None
- *
- * Params:
- * dev The device structure of this device.
- *
- *
- * This function monitors PHY condition by reading the status
- * register via the MII bus. This can be used to give info
- * about link changes (up/down), and possible switch to alternate
- * media.
- *
- * ******************************************************************/
+/*********************************************************************
+ *
+ * tlan_phy_monitor
+ *
+ * Returns:
+ * None
+ *
+ * Params:
+ * dev The device structure of this device.
+ *
+ *
+ * This function monitors PHY condition by reading the status
+ * register via the MII bus. This can be used to give info
+ * about link changes (up/down), and possible switch to alternate
+ * media.
+ *
+ *******************************************************************/
-void TLan_PhyMonitor( struct net_device *dev )
+void tlan_phy_monitor(struct net_device *dev)
{
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
u16 phy;
u16 phy_status;
- phy = priv->phy[priv->phyNum];
+ phy = priv->phy[priv->phy_num];
- /* Get PHY status register */
- TLan_MiiReadReg( dev, phy, MII_GEN_STS, &phy_status );
+ /* Get PHY status register */
+ tlan_mii_read_reg(dev, phy, MII_GEN_STS, &phy_status);
- /* Check if link has been lost */
- if (!(phy_status & MII_GS_LINK)) {
- if (priv->link) {
- priv->link = 0;
- printk(KERN_DEBUG "TLAN: %s has lost link\n", dev->name);
- netif_carrier_off(dev);
- TLan_SetTimer( dev, (2*HZ), TLAN_TIMER_LINK_BEAT );
- return;
+ /* Check if link has been lost */
+ if (!(phy_status & MII_GS_LINK)) {
+ if (priv->link) {
+ priv->link = 0;
+ printk(KERN_DEBUG "TLAN: %s has lost link\n",
+ dev->name);
+ netif_carrier_off(dev);
+ tlan_set_timer(dev, (2*HZ), TLAN_TIMER_LINK_BEAT);
+ return;
}
}
- /* Link restablished? */
- if ((phy_status & MII_GS_LINK) && !priv->link) {
- priv->link = 1;
- printk(KERN_DEBUG "TLAN: %s has reestablished link\n", dev->name);
+ /* Link restablished? */
+ if ((phy_status & MII_GS_LINK) && !priv->link) {
+ priv->link = 1;
+ printk(KERN_DEBUG "TLAN: %s has reestablished link\n",
+ dev->name);
netif_carrier_on(dev);
- }
+ }
/* Setup a new monitor */
- TLan_SetTimer( dev, (2*HZ), TLAN_TIMER_LINK_BEAT );
+ tlan_set_timer(dev, (2*HZ), TLAN_TIMER_LINK_BEAT);
}
#endif /* MONITOR */
@@ -2842,47 +2934,48 @@
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver MII Routines
+ThunderLAN driver MII routines
- These routines are based on the information in Chap. 2 of the
- "ThunderLAN Programmer's Guide", pp. 15-24.
+these routines are based on the information in chap. 2 of the
+"ThunderLAN Programmer's Guide", pp. 15-24.
******************************************************************************
*****************************************************************************/
- /***************************************************************
- * TLan_MiiReadReg
- *
- * Returns:
- * false if ack received ok
- * true if no ack received or other error
- *
- * Parms:
- * dev The device structure containing
- * The io address and interrupt count
- * for this device.
- * phy The address of the PHY to be queried.
- * reg The register whose contents are to be
- * retrieved.
- * val A pointer to a variable to store the
- * retrieved value.
- *
- * This function uses the TLAN's MII bus to retrieve the contents
- * of a given register on a PHY. It sends the appropriate info
- * and then reads the 16-bit register value from the MII bus via
- * the TLAN SIO register.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_mii_read_reg
+ *
+ * Returns:
+ * false if ack received ok
+ * true if no ack received or other error
+ *
+ * Parms:
+ * dev The device structure containing
+ * The io address and interrupt count
+ * for this device.
+ * phy The address of the PHY to be queried.
+ * reg The register whose contents are to be
+ * retrieved.
+ * val A pointer to a variable to store the
+ * retrieved value.
+ *
+ * This function uses the TLAN's MII bus to retrieve the contents
+ * of a given register on a PHY. It sends the appropriate info
+ * and then reads the 16-bit register value from the MII bus via
+ * the TLAN SIO register.
+ *
+ **************************************************************/
-static bool TLan_MiiReadReg( struct net_device *dev, u16 phy, u16 reg, u16 *val )
+static bool
+tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val)
{
u8 nack;
u16 sio, tmp;
- u32 i;
+ u32 i;
bool err;
int minten;
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
unsigned long flags = 0;
err = false;
@@ -2892,48 +2985,48 @@
if (!in_irq())
spin_lock_irqsave(&priv->lock, flags);
- TLan_MiiSync(dev->base_addr);
+ tlan_mii_sync(dev->base_addr);
- minten = TLan_GetBit( TLAN_NET_SIO_MINTEN, sio );
- if ( minten )
- TLan_ClearBit(TLAN_NET_SIO_MINTEN, sio);
+ minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio);
+ if (minten)
+ tlan_clear_bit(TLAN_NET_SIO_MINTEN, sio);
- TLan_MiiSendData( dev->base_addr, 0x1, 2 ); /* Start ( 01b ) */
- TLan_MiiSendData( dev->base_addr, 0x2, 2 ); /* Read ( 10b ) */
- TLan_MiiSendData( dev->base_addr, phy, 5 ); /* Device # */
- TLan_MiiSendData( dev->base_addr, reg, 5 ); /* Register # */
+ tlan_mii_send_data(dev->base_addr, 0x1, 2); /* start (01b) */
+ tlan_mii_send_data(dev->base_addr, 0x2, 2); /* read (10b) */
+ tlan_mii_send_data(dev->base_addr, phy, 5); /* device # */
+ tlan_mii_send_data(dev->base_addr, reg, 5); /* register # */
- TLan_ClearBit(TLAN_NET_SIO_MTXEN, sio); /* Change direction */
+ tlan_clear_bit(TLAN_NET_SIO_MTXEN, sio); /* change direction */
- TLan_ClearBit(TLAN_NET_SIO_MCLK, sio); /* Clock Idle bit */
- TLan_SetBit(TLAN_NET_SIO_MCLK, sio);
- TLan_ClearBit(TLAN_NET_SIO_MCLK, sio); /* Wait 300ns */
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio); /* clock idle bit */
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio); /* wait 300ns */
- nack = TLan_GetBit(TLAN_NET_SIO_MDATA, sio); /* Check for ACK */
- TLan_SetBit(TLAN_NET_SIO_MCLK, sio); /* Finish ACK */
- if (nack) { /* No ACK, so fake it */
+ nack = tlan_get_bit(TLAN_NET_SIO_MDATA, sio); /* check for ACK */
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio); /* finish ACK */
+ if (nack) { /* no ACK, so fake it */
for (i = 0; i < 16; i++) {
- TLan_ClearBit(TLAN_NET_SIO_MCLK, sio);
- TLan_SetBit(TLAN_NET_SIO_MCLK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio);
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
}
tmp = 0xffff;
err = true;
} else { /* ACK, so read data */
for (tmp = 0, i = 0x8000; i; i >>= 1) {
- TLan_ClearBit(TLAN_NET_SIO_MCLK, sio);
- if (TLan_GetBit(TLAN_NET_SIO_MDATA, sio))
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio);
+ if (tlan_get_bit(TLAN_NET_SIO_MDATA, sio))
tmp |= i;
- TLan_SetBit(TLAN_NET_SIO_MCLK, sio);
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
}
}
- TLan_ClearBit(TLAN_NET_SIO_MCLK, sio); /* Idle cycle */
- TLan_SetBit(TLAN_NET_SIO_MCLK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio); /* idle cycle */
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
- if ( minten )
- TLan_SetBit(TLAN_NET_SIO_MINTEN, sio);
+ if (minten)
+ tlan_set_bit(TLAN_NET_SIO_MINTEN, sio);
*val = tmp;
@@ -2942,116 +3035,117 @@
return err;
-} /* TLan_MiiReadReg */
+}
- /***************************************************************
- * TLan_MiiSendData
- *
- * Returns:
- * Nothing
- * Parms:
- * base_port The base IO port of the adapter in
- * question.
- * dev The address of the PHY to be queried.
- * data The value to be placed on the MII bus.
- * num_bits The number of bits in data that are to
- * be placed on the MII bus.
- *
- * This function sends on sequence of bits on the MII
- * configuration bus.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_mii_send_data
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * base_port The base IO port of the adapter in
+ * question.
+ * dev The address of the PHY to be queried.
+ * data The value to be placed on the MII bus.
+ * num_bits The number of bits in data that are to
+ * be placed on the MII bus.
+ *
+ * This function sends on sequence of bits on the MII
+ * configuration bus.
+ *
+ **************************************************************/
-static void TLan_MiiSendData( u16 base_port, u32 data, unsigned num_bits )
+static void tlan_mii_send_data(u16 base_port, u32 data, unsigned num_bits)
{
u16 sio;
u32 i;
- if ( num_bits == 0 )
+ if (num_bits == 0)
return;
- outw( TLAN_NET_SIO, base_port + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, base_port + TLAN_DIO_ADR);
sio = base_port + TLAN_DIO_DATA + TLAN_NET_SIO;
- TLan_SetBit( TLAN_NET_SIO_MTXEN, sio );
+ tlan_set_bit(TLAN_NET_SIO_MTXEN, sio);
- for ( i = ( 0x1 << ( num_bits - 1 ) ); i; i >>= 1 ) {
- TLan_ClearBit( TLAN_NET_SIO_MCLK, sio );
- (void) TLan_GetBit( TLAN_NET_SIO_MCLK, sio );
- if ( data & i )
- TLan_SetBit( TLAN_NET_SIO_MDATA, sio );
+ for (i = (0x1 << (num_bits - 1)); i; i >>= 1) {
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio);
+ (void) tlan_get_bit(TLAN_NET_SIO_MCLK, sio);
+ if (data & i)
+ tlan_set_bit(TLAN_NET_SIO_MDATA, sio);
else
- TLan_ClearBit( TLAN_NET_SIO_MDATA, sio );
- TLan_SetBit( TLAN_NET_SIO_MCLK, sio );
- (void) TLan_GetBit( TLAN_NET_SIO_MCLK, sio );
+ tlan_clear_bit(TLAN_NET_SIO_MDATA, sio);
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
+ (void) tlan_get_bit(TLAN_NET_SIO_MCLK, sio);
}
-} /* TLan_MiiSendData */
+}
- /***************************************************************
- * TLan_MiiSync
- *
- * Returns:
- * Nothing
- * Parms:
- * base_port The base IO port of the adapter in
- * question.
- *
- * This functions syncs all PHYs in terms of the MII configuration
- * bus.
- *
- **************************************************************/
+/***************************************************************
+ * TLan_MiiSync
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * base_port The base IO port of the adapter in
+ * question.
+ *
+ * This functions syncs all PHYs in terms of the MII configuration
+ * bus.
+ *
+ **************************************************************/
-static void TLan_MiiSync( u16 base_port )
+static void tlan_mii_sync(u16 base_port)
{
int i;
u16 sio;
- outw( TLAN_NET_SIO, base_port + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, base_port + TLAN_DIO_ADR);
sio = base_port + TLAN_DIO_DATA + TLAN_NET_SIO;
- TLan_ClearBit( TLAN_NET_SIO_MTXEN, sio );
- for ( i = 0; i < 32; i++ ) {
- TLan_ClearBit( TLAN_NET_SIO_MCLK, sio );
- TLan_SetBit( TLAN_NET_SIO_MCLK, sio );
+ tlan_clear_bit(TLAN_NET_SIO_MTXEN, sio);
+ for (i = 0; i < 32; i++) {
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio);
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
}
-} /* TLan_MiiSync */
+}
- /***************************************************************
- * TLan_MiiWriteReg
- *
- * Returns:
- * Nothing
- * Parms:
- * dev The device structure for the device
- * to write to.
- * phy The address of the PHY to be written to.
- * reg The register whose contents are to be
- * written.
- * val The value to be written to the register.
- *
- * This function uses the TLAN's MII bus to write the contents of a
- * given register on a PHY. It sends the appropriate info and then
- * writes the 16-bit register value from the MII configuration bus
- * via the TLAN SIO register.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_mii_write_reg
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * dev The device structure for the device
+ * to write to.
+ * phy The address of the PHY to be written to.
+ * reg The register whose contents are to be
+ * written.
+ * val The value to be written to the register.
+ *
+ * This function uses the TLAN's MII bus to write the contents of a
+ * given register on a PHY. It sends the appropriate info and then
+ * writes the 16-bit register value from the MII configuration bus
+ * via the TLAN SIO register.
+ *
+ **************************************************************/
-static void TLan_MiiWriteReg( struct net_device *dev, u16 phy, u16 reg, u16 val )
+static void
+tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val)
{
u16 sio;
int minten;
unsigned long flags = 0;
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR);
sio = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO;
@@ -3059,30 +3153,30 @@
if (!in_irq())
spin_lock_irqsave(&priv->lock, flags);
- TLan_MiiSync( dev->base_addr );
+ tlan_mii_sync(dev->base_addr);
- minten = TLan_GetBit( TLAN_NET_SIO_MINTEN, sio );
- if ( minten )
- TLan_ClearBit( TLAN_NET_SIO_MINTEN, sio );
+ minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio);
+ if (minten)
+ tlan_clear_bit(TLAN_NET_SIO_MINTEN, sio);
- TLan_MiiSendData( dev->base_addr, 0x1, 2 ); /* Start ( 01b ) */
- TLan_MiiSendData( dev->base_addr, 0x1, 2 ); /* Write ( 01b ) */
- TLan_MiiSendData( dev->base_addr, phy, 5 ); /* Device # */
- TLan_MiiSendData( dev->base_addr, reg, 5 ); /* Register # */
+ tlan_mii_send_data(dev->base_addr, 0x1, 2); /* start (01b) */
+ tlan_mii_send_data(dev->base_addr, 0x1, 2); /* write (01b) */
+ tlan_mii_send_data(dev->base_addr, phy, 5); /* device # */
+ tlan_mii_send_data(dev->base_addr, reg, 5); /* register # */
- TLan_MiiSendData( dev->base_addr, 0x2, 2 ); /* Send ACK */
- TLan_MiiSendData( dev->base_addr, val, 16 ); /* Send Data */
+ tlan_mii_send_data(dev->base_addr, 0x2, 2); /* send ACK */
+ tlan_mii_send_data(dev->base_addr, val, 16); /* send data */
- TLan_ClearBit( TLAN_NET_SIO_MCLK, sio ); /* Idle cycle */
- TLan_SetBit( TLAN_NET_SIO_MCLK, sio );
+ tlan_clear_bit(TLAN_NET_SIO_MCLK, sio); /* idle cycle */
+ tlan_set_bit(TLAN_NET_SIO_MCLK, sio);
- if ( minten )
- TLan_SetBit( TLAN_NET_SIO_MINTEN, sio );
+ if (minten)
+ tlan_set_bit(TLAN_NET_SIO_MINTEN, sio);
if (!in_irq())
spin_unlock_irqrestore(&priv->lock, flags);
-} /* TLan_MiiWriteReg */
+}
@@ -3090,229 +3184,226 @@
/*****************************************************************************
******************************************************************************
- ThunderLAN Driver Eeprom routines
+ThunderLAN driver eeprom routines
- The Compaq Netelligent 10 and 10/100 cards use a Microchip 24C02A
- EEPROM. These functions are based on information in Microchip's
- data sheet. I don't know how well this functions will work with
- other EEPROMs.
+the Compaq netelligent 10 and 10/100 cards use a microchip 24C02A
+EEPROM. these functions are based on information in microchip's
+data sheet. I don't know how well this functions will work with
+other Eeproms.
******************************************************************************
*****************************************************************************/
- /***************************************************************
- * TLan_EeSendStart
- *
- * Returns:
- * Nothing
- * Parms:
- * io_base The IO port base address for the
- * TLAN device with the EEPROM to
- * use.
- *
- * This function sends a start cycle to an EEPROM attached
- * to a TLAN chip.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_ee_send_start
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * io_base The IO port base address for the
+ * TLAN device with the EEPROM to
+ * use.
+ *
+ * This function sends a start cycle to an EEPROM attached
+ * to a TLAN chip.
+ *
+ **************************************************************/
-static void TLan_EeSendStart( u16 io_base )
+static void tlan_ee_send_start(u16 io_base)
{
u16 sio;
- outw( TLAN_NET_SIO, io_base + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, io_base + TLAN_DIO_ADR);
sio = io_base + TLAN_DIO_DATA + TLAN_NET_SIO;
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_SetBit( TLAN_NET_SIO_EDATA, sio );
- TLan_SetBit( TLAN_NET_SIO_ETXEN, sio );
- TLan_ClearBit( TLAN_NET_SIO_EDATA, sio );
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_set_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_set_bit(TLAN_NET_SIO_ETXEN, sio);
+ tlan_clear_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
-} /* TLan_EeSendStart */
+}
- /***************************************************************
- * TLan_EeSendByte
- *
- * Returns:
- * If the correct ack was received, 0, otherwise 1
- * Parms: io_base The IO port base address for the
- * TLAN device with the EEPROM to
- * use.
- * data The 8 bits of information to
- * send to the EEPROM.
- * stop If TLAN_EEPROM_STOP is passed, a
- * stop cycle is sent after the
- * byte is sent after the ack is
- * read.
- *
- * This function sends a byte on the serial EEPROM line,
- * driving the clock to send each bit. The function then
- * reverses transmission direction and reads an acknowledge
- * bit.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_ee_send_byte
+ *
+ * Returns:
+ * If the correct ack was received, 0, otherwise 1
+ * Parms: io_base The IO port base address for the
+ * TLAN device with the EEPROM to
+ * use.
+ * data The 8 bits of information to
+ * send to the EEPROM.
+ * stop If TLAN_EEPROM_STOP is passed, a
+ * stop cycle is sent after the
+ * byte is sent after the ack is
+ * read.
+ *
+ * This function sends a byte on the serial EEPROM line,
+ * driving the clock to send each bit. The function then
+ * reverses transmission direction and reads an acknowledge
+ * bit.
+ *
+ **************************************************************/
-static int TLan_EeSendByte( u16 io_base, u8 data, int stop )
+static int tlan_ee_send_byte(u16 io_base, u8 data, int stop)
{
int err;
u8 place;
u16 sio;
- outw( TLAN_NET_SIO, io_base + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, io_base + TLAN_DIO_ADR);
sio = io_base + TLAN_DIO_DATA + TLAN_NET_SIO;
/* Assume clock is low, tx is enabled; */
- for ( place = 0x80; place != 0; place >>= 1 ) {
- if ( place & data )
- TLan_SetBit( TLAN_NET_SIO_EDATA, sio );
+ for (place = 0x80; place != 0; place >>= 1) {
+ if (place & data)
+ tlan_set_bit(TLAN_NET_SIO_EDATA, sio);
else
- TLan_ClearBit( TLAN_NET_SIO_EDATA, sio );
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
+ tlan_clear_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
}
- TLan_ClearBit( TLAN_NET_SIO_ETXEN, sio );
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- err = TLan_GetBit( TLAN_NET_SIO_EDATA, sio );
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_SetBit( TLAN_NET_SIO_ETXEN, sio );
+ tlan_clear_bit(TLAN_NET_SIO_ETXEN, sio);
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ err = tlan_get_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_set_bit(TLAN_NET_SIO_ETXEN, sio);
- if ( ( ! err ) && stop ) {
+ if ((!err) && stop) {
/* STOP, raise data while clock is high */
- TLan_ClearBit( TLAN_NET_SIO_EDATA, sio );
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_SetBit( TLAN_NET_SIO_EDATA, sio );
+ tlan_clear_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_set_bit(TLAN_NET_SIO_EDATA, sio);
}
return err;
-} /* TLan_EeSendByte */
+}
- /***************************************************************
- * TLan_EeReceiveByte
- *
- * Returns:
- * Nothing
- * Parms:
- * io_base The IO port base address for the
- * TLAN device with the EEPROM to
- * use.
- * data An address to a char to hold the
- * data sent from the EEPROM.
- * stop If TLAN_EEPROM_STOP is passed, a
- * stop cycle is sent after the
- * byte is received, and no ack is
- * sent.
- *
- * This function receives 8 bits of data from the EEPROM
- * over the serial link. It then sends and ack bit, or no
- * ack and a stop bit. This function is used to retrieve
- * data after the address of a byte in the EEPROM has been
- * sent.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_ee_receive_byte
+ *
+ * Returns:
+ * Nothing
+ * Parms:
+ * io_base The IO port base address for the
+ * TLAN device with the EEPROM to
+ * use.
+ * data An address to a char to hold the
+ * data sent from the EEPROM.
+ * stop If TLAN_EEPROM_STOP is passed, a
+ * stop cycle is sent after the
+ * byte is received, and no ack is
+ * sent.
+ *
+ * This function receives 8 bits of data from the EEPROM
+ * over the serial link. It then sends and ack bit, or no
+ * ack and a stop bit. This function is used to retrieve
+ * data after the address of a byte in the EEPROM has been
+ * sent.
+ *
+ **************************************************************/
-static void TLan_EeReceiveByte( u16 io_base, u8 *data, int stop )
+static void tlan_ee_receive_byte(u16 io_base, u8 *data, int stop)
{
u8 place;
u16 sio;
- outw( TLAN_NET_SIO, io_base + TLAN_DIO_ADR );
+ outw(TLAN_NET_SIO, io_base + TLAN_DIO_ADR);
sio = io_base + TLAN_DIO_DATA + TLAN_NET_SIO;
*data = 0;
/* Assume clock is low, tx is enabled; */
- TLan_ClearBit( TLAN_NET_SIO_ETXEN, sio );
- for ( place = 0x80; place; place >>= 1 ) {
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- if ( TLan_GetBit( TLAN_NET_SIO_EDATA, sio ) )
+ tlan_clear_bit(TLAN_NET_SIO_ETXEN, sio);
+ for (place = 0x80; place; place >>= 1) {
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ if (tlan_get_bit(TLAN_NET_SIO_EDATA, sio))
*data |= place;
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
}
- TLan_SetBit( TLAN_NET_SIO_ETXEN, sio );
- if ( ! stop ) {
- TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* Ack = 0 */
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
+ tlan_set_bit(TLAN_NET_SIO_ETXEN, sio);
+ if (!stop) {
+ tlan_clear_bit(TLAN_NET_SIO_EDATA, sio); /* ack = 0 */
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
} else {
- TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); /* No ack = 1 (?) */
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio );
+ tlan_set_bit(TLAN_NET_SIO_EDATA, sio); /* no ack = 1 (?) */
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_clear_bit(TLAN_NET_SIO_ECLOK, sio);
/* STOP, raise data while clock is high */
- TLan_ClearBit( TLAN_NET_SIO_EDATA, sio );
- TLan_SetBit( TLAN_NET_SIO_ECLOK, sio );
- TLan_SetBit( TLAN_NET_SIO_EDATA, sio );
+ tlan_clear_bit(TLAN_NET_SIO_EDATA, sio);
+ tlan_set_bit(TLAN_NET_SIO_ECLOK, sio);
+ tlan_set_bit(TLAN_NET_SIO_EDATA, sio);
}
-} /* TLan_EeReceiveByte */
+}
- /***************************************************************
- * TLan_EeReadByte
- *
- * Returns:
- * No error = 0, else, the stage at which the error
- * occurred.
- * Parms:
- * io_base The IO port base address for the
- * TLAN device with the EEPROM to
- * use.
- * ee_addr The address of the byte in the
- * EEPROM whose contents are to be
- * retrieved.
- * data An address to a char to hold the
- * data obtained from the EEPROM.
- *
- * This function reads a byte of information from an byte
- * cell in the EEPROM.
- *
- **************************************************************/
+/***************************************************************
+ * tlan_ee_read_byte
+ *
+ * Returns:
+ * No error = 0, else, the stage at which the error
+ * occurred.
+ * Parms:
+ * io_base The IO port base address for the
+ * TLAN device with the EEPROM to
+ * use.
+ * ee_addr The address of the byte in the
+ * EEPROM whose contents are to be
+ * retrieved.
+ * data An address to a char to hold the
+ * data obtained from the EEPROM.
+ *
+ * This function reads a byte of information from an byte
+ * cell in the EEPROM.
+ *
+ **************************************************************/
-static int TLan_EeReadByte( struct net_device *dev, u8 ee_addr, u8 *data )
+static int tlan_ee_read_byte(struct net_device *dev, u8 ee_addr, u8 *data)
{
int err;
- TLanPrivateInfo *priv = netdev_priv(dev);
+ struct tlan_priv *priv = netdev_priv(dev);
unsigned long flags = 0;
- int ret=0;
+ int ret = 0;
spin_lock_irqsave(&priv->lock, flags);
- TLan_EeSendStart( dev->base_addr );
- err = TLan_EeSendByte( dev->base_addr, 0xA0, TLAN_EEPROM_ACK );
- if (err)
- {
- ret=1;
+ tlan_ee_send_start(dev->base_addr);
+ err = tlan_ee_send_byte(dev->base_addr, 0xa0, TLAN_EEPROM_ACK);
+ if (err) {
+ ret = 1;
goto fail;
}
- err = TLan_EeSendByte( dev->base_addr, ee_addr, TLAN_EEPROM_ACK );
- if (err)
- {
- ret=2;
+ err = tlan_ee_send_byte(dev->base_addr, ee_addr, TLAN_EEPROM_ACK);
+ if (err) {
+ ret = 2;
goto fail;
}
- TLan_EeSendStart( dev->base_addr );
- err = TLan_EeSendByte( dev->base_addr, 0xA1, TLAN_EEPROM_ACK );
- if (err)
- {
- ret=3;
+ tlan_ee_send_start(dev->base_addr);
+ err = tlan_ee_send_byte(dev->base_addr, 0xa1, TLAN_EEPROM_ACK);
+ if (err) {
+ ret = 3;
goto fail;
}
- TLan_EeReceiveByte( dev->base_addr, data, TLAN_EEPROM_STOP );
+ tlan_ee_receive_byte(dev->base_addr, data, TLAN_EEPROM_STOP);
fail:
spin_unlock_irqrestore(&priv->lock, flags);
return ret;
-} /* TLan_EeReadByte */
+}
diff --git a/drivers/net/tlan.h b/drivers/net/tlan.h
index 3315ced..5fc98a8 100644
--- a/drivers/net/tlan.h
+++ b/drivers/net/tlan.h
@@ -20,8 +20,8 @@
********************************************************************/
-#include <asm/io.h>
-#include <asm/types.h>
+#include <linux/io.h>
+#include <linux/types.h>
#include <linux/netdevice.h>
@@ -40,8 +40,11 @@
#define TLAN_IGNORE 0
#define TLAN_RECORD 1
-#define TLAN_DBG(lvl, format, args...) \
- do { if (debug&lvl) printk(KERN_DEBUG "TLAN: " format, ##args ); } while(0)
+#define TLAN_DBG(lvl, format, args...) \
+ do { \
+ if (debug&lvl) \
+ printk(KERN_DEBUG "TLAN: " format, ##args); \
+ } while (0)
#define TLAN_DEBUG_GNRL 0x0001
#define TLAN_DEBUG_TX 0x0002
@@ -50,7 +53,8 @@
#define TLAN_DEBUG_PROBE 0x0010
#define TX_TIMEOUT (10*HZ) /* We need time for auto-neg */
-#define MAX_TLAN_BOARDS 8 /* Max number of boards installed at a time */
+#define MAX_TLAN_BOARDS 8 /* Max number of boards installed
+ at a time */
/*****************************************************************
@@ -70,13 +74,13 @@
#define PCI_DEVICE_ID_OLICOM_OC2326 0x0014
#endif
-typedef struct tlan_adapter_entry {
- u16 vendorId;
- u16 deviceId;
- char *deviceLabel;
+struct tlan_adapter_entry {
+ u16 vendor_id;
+ u16 device_id;
+ char *device_label;
u32 flags;
- u16 addrOfs;
-} TLanAdapterEntry;
+ u16 addr_ofs;
+};
#define TLAN_ADAPTER_NONE 0x00000000
#define TLAN_ADAPTER_UNMANAGED_PHY 0x00000001
@@ -129,18 +133,18 @@
#define TLAN_CSTAT_DP_PR 0x0100
-typedef struct tlan_buffer_ref_tag {
+struct tlan_buffer {
u32 count;
u32 address;
-} TLanBufferRef;
+};
-typedef struct tlan_list_tag {
+struct tlan_list {
u32 forward;
- u16 cStat;
- u16 frameSize;
- TLanBufferRef buffer[TLAN_BUFFERS_PER_LIST];
-} TLanList;
+ u16 c_stat;
+ u16 frame_size;
+ struct tlan_buffer buffer[TLAN_BUFFERS_PER_LIST];
+};
typedef u8 TLanBuffer[TLAN_MAX_FRAME_SIZE];
@@ -164,49 +168,49 @@
*
****************************************************************/
-typedef struct tlan_private_tag {
- struct net_device *nextDevice;
- struct pci_dev *pciDev;
+struct tlan_priv {
+ struct net_device *next_device;
+ struct pci_dev *pci_dev;
struct net_device *dev;
- void *dmaStorage;
- dma_addr_t dmaStorageDMA;
- unsigned int dmaSize;
- u8 *padBuffer;
- TLanList *rxList;
- dma_addr_t rxListDMA;
- u8 *rxBuffer;
- dma_addr_t rxBufferDMA;
- u32 rxHead;
- u32 rxTail;
- u32 rxEocCount;
- TLanList *txList;
- dma_addr_t txListDMA;
- u8 *txBuffer;
- dma_addr_t txBufferDMA;
- u32 txHead;
- u32 txInProgress;
- u32 txTail;
- u32 txBusyCount;
- u32 phyOnline;
- u32 timerSetAt;
- u32 timerType;
+ void *dma_storage;
+ dma_addr_t dma_storage_dma;
+ unsigned int dma_size;
+ u8 *pad_buffer;
+ struct tlan_list *rx_list;
+ dma_addr_t rx_list_dma;
+ u8 *rx_buffer;
+ dma_addr_t rx_buffer_dma;
+ u32 rx_head;
+ u32 rx_tail;
+ u32 rx_eoc_count;
+ struct tlan_list *tx_list;
+ dma_addr_t tx_list_dma;
+ u8 *tx_buffer;
+ dma_addr_t tx_buffer_dma;
+ u32 tx_head;
+ u32 tx_in_progress;
+ u32 tx_tail;
+ u32 tx_busy_count;
+ u32 phy_online;
+ u32 timer_set_at;
+ u32 timer_type;
struct timer_list timer;
struct board *adapter;
- u32 adapterRev;
+ u32 adapter_rev;
u32 aui;
u32 debug;
u32 duplex;
u32 phy[2];
- u32 phyNum;
+ u32 phy_num;
u32 speed;
- u8 tlanRev;
- u8 tlanFullDuplex;
+ u8 tlan_rev;
+ u8 tlan_full_duplex;
spinlock_t lock;
u8 link;
u8 is_eisa;
struct work_struct tlan_tqueue;
u8 neg_be_verbose;
-} TLanPrivateInfo;
+};
@@ -247,7 +251,7 @@
****************************************************************/
#define TLAN_HOST_CMD 0x00
-#define TLAN_HC_GO 0x80000000
+#define TLAN_HC_GO 0x80000000
#define TLAN_HC_STOP 0x40000000
#define TLAN_HC_ACK 0x20000000
#define TLAN_HC_CS_MASK 0x1FE00000
@@ -283,7 +287,7 @@
#define TLAN_NET_CMD_TRFRAM 0x02
#define TLAN_NET_CMD_TXPACE 0x01
#define TLAN_NET_SIO 0x01
-#define TLAN_NET_SIO_MINTEN 0x80
+#define TLAN_NET_SIO_MINTEN 0x80
#define TLAN_NET_SIO_ECLOK 0x40
#define TLAN_NET_SIO_ETXEN 0x20
#define TLAN_NET_SIO_EDATA 0x10
@@ -304,7 +308,7 @@
#define TLAN_NET_MASK_MASK4 0x10
#define TLAN_NET_MASK_RSRVD 0x0F
#define TLAN_NET_CONFIG 0x04
-#define TLAN_NET_CFG_RCLK 0x8000
+#define TLAN_NET_CFG_RCLK 0x8000
#define TLAN_NET_CFG_TCLK 0x4000
#define TLAN_NET_CFG_BIT 0x2000
#define TLAN_NET_CFG_RXCRC 0x1000
@@ -372,7 +376,7 @@
/* Generic MII/PHY Registers */
#define MII_GEN_CTL 0x00
-#define MII_GC_RESET 0x8000
+#define MII_GC_RESET 0x8000
#define MII_GC_LOOPBK 0x4000
#define MII_GC_SPEEDSEL 0x2000
#define MII_GC_AUTOENB 0x1000
@@ -397,9 +401,9 @@
#define MII_GS_EXTCAP 0x0001
#define MII_GEN_ID_HI 0x02
#define MII_GEN_ID_LO 0x03
-#define MII_GIL_OUI 0xFC00
-#define MII_GIL_MODEL 0x03F0
-#define MII_GIL_REVISION 0x000F
+#define MII_GIL_OUI 0xFC00
+#define MII_GIL_MODEL 0x03F0
+#define MII_GIL_REVISION 0x000F
#define MII_AN_ADV 0x04
#define MII_AN_LPA 0x05
#define MII_AN_EXP 0x06
@@ -408,7 +412,7 @@
#define TLAN_TLPHY_ID 0x10
#define TLAN_TLPHY_CTL 0x11
-#define TLAN_TC_IGLINK 0x8000
+#define TLAN_TC_IGLINK 0x8000
#define TLAN_TC_SWAPOL 0x4000
#define TLAN_TC_AUISEL 0x2000
#define TLAN_TC_SQEEN 0x1000
@@ -435,41 +439,41 @@
#define LEVEL1_ID1 0x7810
#define LEVEL1_ID2 0x0000
-#define CIRC_INC( a, b ) if ( ++a >= b ) a = 0
+#define CIRC_INC(a, b) if (++a >= b) a = 0
/* Routines to access internal registers. */
-static inline u8 TLan_DioRead8(u16 base_addr, u16 internal_addr)
+static inline u8 tlan_dio_read8(u16 base_addr, u16 internal_addr)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
return inb((base_addr + TLAN_DIO_DATA) + (internal_addr & 0x3));
-} /* TLan_DioRead8 */
+}
-static inline u16 TLan_DioRead16(u16 base_addr, u16 internal_addr)
+static inline u16 tlan_dio_read16(u16 base_addr, u16 internal_addr)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
return inw((base_addr + TLAN_DIO_DATA) + (internal_addr & 0x2));
-} /* TLan_DioRead16 */
+}
-static inline u32 TLan_DioRead32(u16 base_addr, u16 internal_addr)
+static inline u32 tlan_dio_read32(u16 base_addr, u16 internal_addr)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
return inl(base_addr + TLAN_DIO_DATA);
-} /* TLan_DioRead32 */
+}
-static inline void TLan_DioWrite8(u16 base_addr, u16 internal_addr, u8 data)
+static inline void tlan_dio_write8(u16 base_addr, u16 internal_addr, u8 data)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
outb(data, base_addr + TLAN_DIO_DATA + (internal_addr & 0x3));
@@ -479,7 +483,7 @@
-static inline void TLan_DioWrite16(u16 base_addr, u16 internal_addr, u16 data)
+static inline void tlan_dio_write16(u16 base_addr, u16 internal_addr, u16 data)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
outw(data, base_addr + TLAN_DIO_DATA + (internal_addr & 0x2));
@@ -489,16 +493,16 @@
-static inline void TLan_DioWrite32(u16 base_addr, u16 internal_addr, u32 data)
+static inline void tlan_dio_write32(u16 base_addr, u16 internal_addr, u32 data)
{
outw(internal_addr, base_addr + TLAN_DIO_ADR);
outl(data, base_addr + TLAN_DIO_DATA + (internal_addr & 0x2));
}
-#define TLan_ClearBit( bit, port ) outb_p(inb_p(port) & ~bit, port)
-#define TLan_GetBit( bit, port ) ((int) (inb_p(port) & bit))
-#define TLan_SetBit( bit, port ) outb_p(inb_p(port) | bit, port)
+#define tlan_clear_bit(bit, port) outb_p(inb_p(port) & ~bit, port)
+#define tlan_get_bit(bit, port) ((int) (inb_p(port) & bit))
+#define tlan_set_bit(bit, port) outb_p(inb_p(port) | bit, port)
/*
* given 6 bytes, view them as 8 6-bit numbers and return the XOR of those
@@ -506,37 +510,37 @@
*
* The original code was:
*
- * u32 xor( u32 a, u32 b ) { return ( ( a && ! b ) || ( ! a && b ) ); }
+ * u32 xor(u32 a, u32 b) { return ((a && !b ) || (! a && b )); }
*
- * #define XOR8( a, b, c, d, e, f, g, h ) \
- * xor( a, xor( b, xor( c, xor( d, xor( e, xor( f, xor( g, h ) ) ) ) ) ) )
- * #define DA( a, bit ) ( ( (u8) a[bit/8] ) & ( (u8) ( 1 << bit%8 ) ) )
+ * #define XOR8(a, b, c, d, e, f, g, h) \
+ * xor(a, xor(b, xor(c, xor(d, xor(e, xor(f, xor(g, h)) ) ) ) ) )
+ * #define DA(a, bit) (( (u8) a[bit/8] ) & ( (u8) (1 << bit%8)) )
*
- * hash = XOR8( DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24),
- * DA(a,30), DA(a,36), DA(a,42) );
- * hash |= XOR8( DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25),
- * DA(a,31), DA(a,37), DA(a,43) ) << 1;
- * hash |= XOR8( DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26),
- * DA(a,32), DA(a,38), DA(a,44) ) << 2;
- * hash |= XOR8( DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27),
- * DA(a,33), DA(a,39), DA(a,45) ) << 3;
- * hash |= XOR8( DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28),
- * DA(a,34), DA(a,40), DA(a,46) ) << 4;
- * hash |= XOR8( DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29),
- * DA(a,35), DA(a,41), DA(a,47) ) << 5;
+ * hash = XOR8(DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24),
+ * DA(a,30), DA(a,36), DA(a,42));
+ * hash |= XOR8(DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25),
+ * DA(a,31), DA(a,37), DA(a,43)) << 1;
+ * hash |= XOR8(DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26),
+ * DA(a,32), DA(a,38), DA(a,44)) << 2;
+ * hash |= XOR8(DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27),
+ * DA(a,33), DA(a,39), DA(a,45)) << 3;
+ * hash |= XOR8(DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28),
+ * DA(a,34), DA(a,40), DA(a,46)) << 4;
+ * hash |= XOR8(DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29),
+ * DA(a,35), DA(a,41), DA(a,47)) << 5;
*
*/
-static inline u32 TLan_HashFunc( const u8 *a )
+static inline u32 tlan_hash_func(const u8 *a)
{
- u8 hash;
+ u8 hash;
- hash = (a[0]^a[3]); /* & 077 */
- hash ^= ((a[0]^a[3])>>6); /* & 003 */
- hash ^= ((a[1]^a[4])<<2); /* & 074 */
- hash ^= ((a[1]^a[4])>>4); /* & 017 */
- hash ^= ((a[2]^a[5])<<4); /* & 060 */
- hash ^= ((a[2]^a[5])>>2); /* & 077 */
+ hash = (a[0]^a[3]); /* & 077 */
+ hash ^= ((a[0]^a[3])>>6); /* & 003 */
+ hash ^= ((a[1]^a[4])<<2); /* & 074 */
+ hash ^= ((a[1]^a[4])>>4); /* & 017 */
+ hash ^= ((a[2]^a[5])<<4); /* & 060 */
+ hash ^= ((a[2]^a[5])>>2); /* & 077 */
- return hash & 077;
+ return hash & 077;
}
#endif
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b100bd5..55786a0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1142,7 +1142,7 @@
* privs required. */
static int set_offload(struct net_device *dev, unsigned long arg)
{
- unsigned int old_features, features;
+ u32 old_features, features;
old_features = dev->features;
/* Unset features, set them as we chew on the arg. */
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index a3c46f6..7fa5ec2 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -123,12 +123,11 @@
#include <linux/in6.h>
#include <linux/dma-mapping.h>
#include <linux/firmware.h>
-#include <generated/utsrelease.h>
#include "typhoon.h"
MODULE_AUTHOR("David Dillow <dave@thedillows.org>");
-MODULE_VERSION(UTS_RELEASE);
+MODULE_VERSION("1.0");
MODULE_LICENSE("GPL");
MODULE_FIRMWARE(FIRMWARE_NAME);
MODULE_DESCRIPTION("3Com Typhoon Family (3C990, 3CR990, and variants)");
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index cc83fa7..105d7f0 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -403,17 +403,6 @@
if (tb[IFLA_ADDRESS] == NULL)
random_ether_addr(dev->dev_addr);
- if (tb[IFLA_IFNAME])
- nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
- else
- snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
-
- if (strchr(dev->name, '%')) {
- err = dev_alloc_name(dev, dev->name);
- if (err < 0)
- goto err_alloc_name;
- }
-
err = register_netdevice(dev);
if (err < 0)
goto err_register_dev;
@@ -433,7 +422,6 @@
err_register_dev:
/* nothing to do */
-err_alloc_name:
err_configure_peer:
unregister_netdevice(peer);
return err;
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 09cac70..0d6fec6 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2923,6 +2923,7 @@
static int velocity_set_wol(struct velocity_info *vptr)
{
struct mac_regs __iomem *regs = vptr->mac_regs;
+ enum speed_opt spd_dpx = vptr->options.spd_dpx;
static u8 buf[256];
int i;
@@ -2968,6 +2969,12 @@
writew(0x0FFF, ®s->WOLSRClr);
+ if (spd_dpx == SPD_DPX_1000_FULL)
+ goto mac_done;
+
+ if (spd_dpx != SPD_DPX_AUTO)
+ goto advertise_done;
+
if (vptr->mii_status & VELOCITY_AUTONEG_ENABLE) {
if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201)
MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs);
@@ -2978,6 +2985,7 @@
if (vptr->mii_status & VELOCITY_SPEED_1000)
MII_REG_BITS_ON(BMCR_ANRESTART, MII_BMCR, vptr->mac_regs);
+advertise_done:
BYTE_REG_BITS_ON(CHIPGCR_FCMODE, ®s->CHIPGCR);
{
@@ -2987,6 +2995,7 @@
writeb(GCR, ®s->CHIPGCR);
}
+mac_done:
BYTE_REG_BITS_OFF(ISR_PWEI, ®s->ISR);
/* Turn on SWPTAG just before entering power mode */
BYTE_REG_BITS_ON(STICKHW_SWPTAG, ®s->STICKHW);
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index aa2e69b..d722753 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -361,7 +361,7 @@
#define MAC_REG_CHIPGSR 0x9C
#define MAC_REG_TESTCFG 0x9D
#define MAC_REG_DEBUG 0x9E
-#define MAC_REG_CHIPGCR 0x9F
+#define MAC_REG_CHIPGCR 0x9F /* Chip Operation and Diagnostic Control */
#define MAC_REG_WOLCR0_SET 0xA0
#define MAC_REG_WOLCR1_SET 0xA1
#define MAC_REG_PWCFG_SET 0xA2
@@ -848,10 +848,10 @@
* Bits in CHIPGCR register
*/
-#define CHIPGCR_FCGMII 0x80 /* enable GMII mode */
-#define CHIPGCR_FCFDX 0x40
+#define CHIPGCR_FCGMII 0x80 /* force GMII (else MII only) */
+#define CHIPGCR_FCFDX 0x40 /* force full duplex */
#define CHIPGCR_FCRESV 0x20
-#define CHIPGCR_FCMODE 0x10
+#define CHIPGCR_FCMODE 0x10 /* enable MAC forced mode */
#define CHIPGCR_LPSOPT 0x08
#define CHIPGCR_TM1US 0x04
#define CHIPGCR_TM0US 0x02
diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c
index 01c05f5..77097e3 100644
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -387,8 +387,8 @@
data1 = steer_ctrl = 0;
status = vxge_hw_vpath_fw_api(vpath,
- VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
VXGE_HW_FW_API_GET_EPROM_REV,
+ VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
0, &data0, &data1, &steer_ctrl);
if (status != VXGE_HW_OK)
break;
@@ -2868,6 +2868,8 @@
ring->rxd_init = attr->rxd_init;
ring->rxd_term = attr->rxd_term;
ring->buffer_mode = config->buffer_mode;
+ ring->tim_rti_cfg1_saved = vp->vpath->tim_rti_cfg1_saved;
+ ring->tim_rti_cfg3_saved = vp->vpath->tim_rti_cfg3_saved;
ring->rxds_limit = config->rxds_limit;
ring->rxd_size = vxge_hw_ring_rxd_size_get(config->buffer_mode);
@@ -3511,6 +3513,8 @@
/* apply "interrupts per txdl" attribute */
fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_UTILZ;
+ fifo->tim_tti_cfg1_saved = vpath->tim_tti_cfg1_saved;
+ fifo->tim_tti_cfg3_saved = vpath->tim_tti_cfg3_saved;
if (fifo->config->intr)
fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_PER_LIST;
@@ -4377,6 +4381,8 @@
}
writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+ vpath->tim_tti_cfg1_saved = val64;
+
val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_TX]);
if (config->tti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4433,6 +4439,7 @@
}
writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+ vpath->tim_tti_cfg3_saved = val64;
}
if (config->ring.enable == VXGE_HW_RING_ENABLE) {
@@ -4481,6 +4488,8 @@
}
writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+ vpath->tim_rti_cfg1_saved = val64;
+
val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_RX]);
if (config->rti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4537,6 +4546,7 @@
}
writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+ vpath->tim_rti_cfg3_saved = val64;
}
val64 = 0;
@@ -4555,26 +4565,6 @@
return status;
}
-void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id)
-{
- struct __vxge_hw_virtualpath *vpath;
- struct vxge_hw_vpath_reg __iomem *vp_reg;
- struct vxge_hw_vp_config *config;
- u64 val64;
-
- vpath = &hldev->virtual_paths[vp_id];
- vp_reg = vpath->vp_reg;
- config = vpath->vp_config;
-
- if (config->fifo.enable == VXGE_HW_FIFO_ENABLE &&
- config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
- config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
- val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
- val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
- writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
- }
-}
-
/*
* __vxge_hw_vpath_initialize
* This routine is the final phase of init which initializes the
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index e249e28..3c53aa7 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -682,6 +682,10 @@
u32 vsport_number;
u32 max_kdfc_db;
u32 max_nofl_db;
+ u64 tim_tti_cfg1_saved;
+ u64 tim_tti_cfg3_saved;
+ u64 tim_rti_cfg1_saved;
+ u64 tim_rti_cfg3_saved;
struct __vxge_hw_ring *____cacheline_aligned ringh;
struct __vxge_hw_fifo *____cacheline_aligned fifoh;
@@ -921,6 +925,9 @@
u32 doorbell_cnt;
u32 total_db_cnt;
u64 rxds_limit;
+ u32 rtimer;
+ u64 tim_rti_cfg1_saved;
+ u64 tim_rti_cfg3_saved;
enum vxge_hw_status (*callback)(
struct __vxge_hw_ring *ringh,
@@ -1000,6 +1007,9 @@
u32 per_txdl_space;
u32 vp_id;
u32 tx_intr_num;
+ u32 rtimer;
+ u64 tim_tti_cfg1_saved;
+ u64 tim_tti_cfg3_saved;
enum vxge_hw_status (*callback)(
struct __vxge_hw_fifo *fifo_handle,
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index c81a651..e40f619 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -371,9 +371,6 @@
struct vxge_hw_ring_rxd_info ext_info;
vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d",
ring->ndev->name, __func__, __LINE__);
- ring->pkts_processed = 0;
-
- vxge_hw_ring_replenish(ringh);
do {
prefetch((char *)dtr + L1_CACHE_BYTES);
@@ -1588,6 +1585,36 @@
return ret;
}
+/* Configure CI */
+static void vxge_config_ci_for_tti_rti(struct vxgedev *vdev)
+{
+ int i = 0;
+
+ /* Enable CI for RTI */
+ if (vdev->config.intr_type == MSI_X) {
+ for (i = 0; i < vdev->no_of_vpath; i++) {
+ struct __vxge_hw_ring *hw_ring;
+
+ hw_ring = vdev->vpaths[i].ring.handle;
+ vxge_hw_vpath_dynamic_rti_ci_set(hw_ring);
+ }
+ }
+
+ /* Enable CI for TTI */
+ for (i = 0; i < vdev->no_of_vpath; i++) {
+ struct __vxge_hw_fifo *hw_fifo = vdev->vpaths[i].fifo.handle;
+ vxge_hw_vpath_tti_ci_set(hw_fifo);
+ /*
+ * For Inta (with or without napi), Set CI ON for only one
+ * vpath. (Have only one free running timer).
+ */
+ if ((vdev->config.intr_type == INTA) && (i == 0))
+ break;
+ }
+
+ return;
+}
+
static int do_vxge_reset(struct vxgedev *vdev, int event)
{
enum vxge_hw_status status;
@@ -1753,6 +1780,9 @@
netif_tx_wake_all_queues(vdev->ndev);
}
+ /* configure CI */
+ vxge_config_ci_for_tti_rti(vdev);
+
out:
vxge_debug_entryexit(VXGE_TRACE,
"%s:%d Exiting...", __func__, __LINE__);
@@ -1793,22 +1823,29 @@
*/
static int vxge_poll_msix(struct napi_struct *napi, int budget)
{
- struct vxge_ring *ring =
- container_of(napi, struct vxge_ring, napi);
+ struct vxge_ring *ring = container_of(napi, struct vxge_ring, napi);
+ int pkts_processed;
int budget_org = budget;
- ring->budget = budget;
+ ring->budget = budget;
+ ring->pkts_processed = 0;
vxge_hw_vpath_poll_rx(ring->handle);
+ pkts_processed = ring->pkts_processed;
if (ring->pkts_processed < budget_org) {
napi_complete(napi);
+
/* Re enable the Rx interrupts for the vpath */
vxge_hw_channel_msix_unmask(
(struct __vxge_hw_channel *)ring->handle,
ring->rx_vector_no);
+ mmiowb();
}
- return ring->pkts_processed;
+ /* We are copying and returning the local variable, in case if after
+ * clearing the msix interrupt above, if the interrupt fires right
+ * away which can preempt this NAPI thread */
+ return pkts_processed;
}
static int vxge_poll_inta(struct napi_struct *napi, int budget)
@@ -1824,6 +1861,7 @@
for (i = 0; i < vdev->no_of_vpath; i++) {
ring = &vdev->vpaths[i].ring;
ring->budget = budget;
+ ring->pkts_processed = 0;
vxge_hw_vpath_poll_rx(ring->handle);
pkts_processed += ring->pkts_processed;
budget -= ring->pkts_processed;
@@ -2054,6 +2092,7 @@
netdev_get_tx_queue(vdev->ndev, 0);
vpath->fifo.indicate_max_pkts =
vdev->config.fifo_indicate_max_pkts;
+ vpath->fifo.tx_vector_no = 0;
vpath->ring.rx_vector_no = 0;
vpath->ring.rx_csum = vdev->rx_csum;
vpath->ring.rx_hwts = vdev->rx_hwts;
@@ -2079,6 +2118,61 @@
return VXGE_HW_OK;
}
+/**
+ * adaptive_coalesce_tx_interrupts - Changes the interrupt coalescing
+ * if the interrupts are not within a range
+ * @fifo: pointer to transmit fifo structure
+ * Description: The function changes boundary timer and restriction timer
+ * value depends on the traffic
+ * Return Value: None
+ */
+static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
+{
+ fifo->interrupt_count++;
+ if (jiffies > fifo->jiffies + HZ / 100) {
+ struct __vxge_hw_fifo *hw_fifo = fifo->handle;
+
+ fifo->jiffies = jiffies;
+ if (fifo->interrupt_count > VXGE_T1A_MAX_TX_INTERRUPT_COUNT &&
+ hw_fifo->rtimer != VXGE_TTI_RTIMER_ADAPT_VAL) {
+ hw_fifo->rtimer = VXGE_TTI_RTIMER_ADAPT_VAL;
+ vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+ } else if (hw_fifo->rtimer != 0) {
+ hw_fifo->rtimer = 0;
+ vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+ }
+ fifo->interrupt_count = 0;
+ }
+}
+
+/**
+ * adaptive_coalesce_rx_interrupts - Changes the interrupt coalescing
+ * if the interrupts are not within a range
+ * @ring: pointer to receive ring structure
+ * Description: The function increases of decreases the packet counts within
+ * the ranges of traffic utilization, if the interrupts due to this ring are
+ * not within a fixed range.
+ * Return Value: Nothing
+ */
+static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
+{
+ ring->interrupt_count++;
+ if (jiffies > ring->jiffies + HZ / 100) {
+ struct __vxge_hw_ring *hw_ring = ring->handle;
+
+ ring->jiffies = jiffies;
+ if (ring->interrupt_count > VXGE_T1A_MAX_INTERRUPT_COUNT &&
+ hw_ring->rtimer != VXGE_RTI_RTIMER_ADAPT_VAL) {
+ hw_ring->rtimer = VXGE_RTI_RTIMER_ADAPT_VAL;
+ vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+ } else if (hw_ring->rtimer != 0) {
+ hw_ring->rtimer = 0;
+ vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+ }
+ ring->interrupt_count = 0;
+ }
+}
+
/*
* vxge_isr_napi
* @irq: the irq of the device.
@@ -2139,24 +2233,39 @@
#ifdef CONFIG_PCI_MSI
-static irqreturn_t
-vxge_tx_msix_handle(int irq, void *dev_id)
+static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
{
struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
+ adaptive_coalesce_tx_interrupts(fifo);
+
+ vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)fifo->handle,
+ fifo->tx_vector_no);
+
+ vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)fifo->handle,
+ fifo->tx_vector_no);
+
VXGE_COMPLETE_VPATH_TX(fifo);
+ vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle,
+ fifo->tx_vector_no);
+
+ mmiowb();
+
return IRQ_HANDLED;
}
-static irqreturn_t
-vxge_rx_msix_napi_handle(int irq, void *dev_id)
+static irqreturn_t vxge_rx_msix_napi_handle(int irq, void *dev_id)
{
struct vxge_ring *ring = (struct vxge_ring *)dev_id;
- /* MSIX_IDX for Rx is 1 */
+ adaptive_coalesce_rx_interrupts(ring);
+
vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)ring->handle,
- ring->rx_vector_no);
+ ring->rx_vector_no);
+
+ vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)ring->handle,
+ ring->rx_vector_no);
napi_schedule(&ring->napi);
return IRQ_HANDLED;
@@ -2173,14 +2282,20 @@
VXGE_HW_VPATH_MSIX_ACTIVE) + VXGE_ALARM_MSIX_ID;
for (i = 0; i < vdev->no_of_vpath; i++) {
+ /* Reduce the chance of loosing alarm interrupts by masking
+ * the vector. A pending bit will be set if an alarm is
+ * generated and on unmask the interrupt will be fired.
+ */
vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id);
+ vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id);
+ mmiowb();
status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle,
vdev->exec_mode);
if (status == VXGE_HW_OK) {
-
vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle,
- msix_id);
+ msix_id);
+ mmiowb();
continue;
}
vxge_debug_intr(VXGE_ERR,
@@ -2299,6 +2414,9 @@
vpath->ring.rx_vector_no = (vpath->device_id *
VXGE_HW_VPATH_MSIX_ACTIVE) + 1;
+ vpath->fifo.tx_vector_no = (vpath->device_id *
+ VXGE_HW_VPATH_MSIX_ACTIVE);
+
vxge_hw_vpath_msix_set(vpath->handle, tim_msix_id,
VXGE_ALARM_MSIX_ID);
}
@@ -2474,8 +2592,9 @@
"%s:vxge:INTA", vdev->ndev->name);
vxge_hw_device_set_intr_type(vdev->devh,
VXGE_HW_INTR_MODE_IRQLINE);
- vxge_hw_vpath_tti_ci_set(vdev->devh,
- vdev->vpaths[0].device_id);
+
+ vxge_hw_vpath_tti_ci_set(vdev->vpaths[0].fifo.handle);
+
ret = request_irq((int) vdev->pdev->irq,
vxge_isr_napi,
IRQF_SHARED, vdev->desc[0], vdev);
@@ -2745,6 +2864,10 @@
}
netif_tx_start_all_queues(vdev->ndev);
+
+ /* configure CI */
+ vxge_config_ci_for_tti_rti(vdev);
+
goto out0;
out2:
@@ -3348,7 +3471,7 @@
vxge_debug_init(VXGE_ERR,
"%s: vpath memory allocation failed",
vdev->ndev->name);
- ret = -ENODEV;
+ ret = -ENOMEM;
goto _out1;
}
@@ -3369,11 +3492,11 @@
if (vdev->config.gro_enable)
ndev->features |= NETIF_F_GRO;
- if (register_netdev(ndev)) {
+ ret = register_netdev(ndev);
+ if (ret) {
vxge_debug_init(vxge_hw_device_trace_level_get(hldev),
"%s: %s : device registration failed!",
ndev->name, __func__);
- ret = -ENODEV;
goto _out2;
}
@@ -3444,6 +3567,11 @@
/* in 2.6 will call stop() if device is up */
unregister_netdev(dev);
+ kfree(vdev->vpaths);
+
+ /* we are safe to free it now */
+ free_netdev(dev);
+
vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
buf);
vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf,
@@ -3799,7 +3927,7 @@
break;
case MSI_X:
- device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX;
+ device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX_ONE_SHOT;
break;
}
@@ -4335,10 +4463,10 @@
goto _exit1;
}
- if (pci_request_region(pdev, 0, VXGE_DRIVER_NAME)) {
+ ret = pci_request_region(pdev, 0, VXGE_DRIVER_NAME);
+ if (ret) {
vxge_debug_init(VXGE_ERR,
"%s : request regions failed", __func__);
- ret = -ENODEV;
goto _exit1;
}
@@ -4446,7 +4574,7 @@
if (!img[i].is_valid)
break;
vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version "
- "%d.%d.%d.%d\n", VXGE_DRIVER_NAME, i,
+ "%d.%d.%d.%d", VXGE_DRIVER_NAME, i,
VXGE_EPROM_IMG_MAJOR(img[i].version),
VXGE_EPROM_IMG_MINOR(img[i].version),
VXGE_EPROM_IMG_FIX(img[i].version),
@@ -4643,8 +4771,9 @@
_exit5:
vxge_device_unregister(hldev);
_exit4:
- pci_disable_sriov(pdev);
+ pci_set_drvdata(pdev, NULL);
vxge_hw_device_terminate(hldev);
+ pci_disable_sriov(pdev);
_exit3:
iounmap(attr.bar0);
_exit2:
@@ -4655,7 +4784,7 @@
kfree(ll_config);
kfree(device_config);
driver_config->config_dev_cnt--;
- pci_set_drvdata(pdev, NULL);
+ driver_config->total_dev_cnt--;
return ret;
}
@@ -4668,45 +4797,34 @@
static void __devexit vxge_remove(struct pci_dev *pdev)
{
struct __vxge_hw_device *hldev;
- struct vxgedev *vdev = NULL;
- struct net_device *dev;
- int i = 0;
+ struct vxgedev *vdev;
+ int i;
hldev = pci_get_drvdata(pdev);
-
if (hldev == NULL)
return;
- dev = hldev->ndev;
- vdev = netdev_priv(dev);
+ vdev = netdev_priv(hldev->ndev);
vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__);
-
vxge_debug_init(vdev->level_trace, "%s : removing PCI device...",
__func__);
- vxge_device_unregister(hldev);
- for (i = 0; i < vdev->no_of_vpath; i++) {
+ for (i = 0; i < vdev->no_of_vpath; i++)
vxge_free_mac_add_list(&vdev->vpaths[i]);
- vdev->vpaths[i].mcast_addr_cnt = 0;
- vdev->vpaths[i].mac_addr_cnt = 0;
- }
- kfree(vdev->vpaths);
-
+ vxge_device_unregister(hldev);
+ pci_set_drvdata(pdev, NULL);
+ /* Do not call pci_disable_sriov here, as it will break child devices */
+ vxge_hw_device_terminate(hldev);
iounmap(vdev->bar0);
-
- /* we are safe to free it now */
- free_netdev(dev);
+ pci_release_region(pdev, 0);
+ pci_disable_device(pdev);
+ driver_config->config_dev_cnt--;
+ driver_config->total_dev_cnt--;
vxge_debug_init(vdev->level_trace, "%s:%d Device unregistered",
__func__, __LINE__);
-
- vxge_hw_device_terminate(hldev);
-
- pci_disable_device(pdev);
- pci_release_region(pdev, 0);
- pci_set_drvdata(pdev, NULL);
vxge_debug_entryexit(vdev->level_trace, "%s:%d Exiting...", __func__,
__LINE__);
}
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h
index 5746fed..40474f0 100644
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -59,11 +59,13 @@
#define VXGE_TTI_LTIMER_VAL 1000
#define VXGE_T1A_TTI_LTIMER_VAL 80
#define VXGE_TTI_RTIMER_VAL 0
+#define VXGE_TTI_RTIMER_ADAPT_VAL 10
#define VXGE_T1A_TTI_RTIMER_VAL 400
#define VXGE_RTI_BTIMER_VAL 250
#define VXGE_RTI_LTIMER_VAL 100
#define VXGE_RTI_RTIMER_VAL 0
-#define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH
+#define VXGE_RTI_RTIMER_ADAPT_VAL 15
+#define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH
#define VXGE_ISR_POLLING_CNT 8
#define VXGE_MAX_CONFIG_DEV 0xFF
#define VXGE_EXEC_MODE_DISABLE 0
@@ -107,6 +109,14 @@
#define RTI_T1A_RX_UFC_C 50
#define RTI_T1A_RX_UFC_D 60
+/*
+ * The interrupt rate is maintained at 3k per second with the moderation
+ * parameters for most traffic but not all. This is the maximum interrupt
+ * count allowed per function with INTA or per vector in the case of
+ * MSI-X in a 10 millisecond time period. Enabled only for Titan 1A.
+ */
+#define VXGE_T1A_MAX_INTERRUPT_COUNT 100
+#define VXGE_T1A_MAX_TX_INTERRUPT_COUNT 200
/* Milli secs timer period */
#define VXGE_TIMER_DELAY 10000
@@ -247,6 +257,11 @@
int tx_steering_type;
int indicate_max_pkts;
+ /* Adaptive interrupt moderation parameters used in T1A */
+ unsigned long interrupt_count;
+ unsigned long jiffies;
+
+ u32 tx_vector_no;
/* Tx stats */
struct vxge_fifo_stats stats;
} ____cacheline_aligned;
@@ -271,6 +286,10 @@
*/
int driver_id;
+ /* Adaptive interrupt moderation parameters used in T1A */
+ unsigned long interrupt_count;
+ unsigned long jiffies;
+
/* copy of the flag indicating whether rx_csum is to be used */
u32 rx_csum:1,
rx_hwts:1;
@@ -286,7 +305,7 @@
int vlan_tag_strip;
struct vlan_group *vlgrp;
- int rx_vector_no;
+ u32 rx_vector_no;
enum vxge_hw_status last_status;
/* Rx stats */
diff --git a/drivers/net/vxge/vxge-traffic.c b/drivers/net/vxge/vxge-traffic.c
index 4c10d6c..8674f33 100644
--- a/drivers/net/vxge/vxge-traffic.c
+++ b/drivers/net/vxge/vxge-traffic.c
@@ -218,6 +218,68 @@
return status;
}
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo)
+{
+ struct vxge_hw_vpath_reg __iomem *vp_reg;
+ struct vxge_hw_vp_config *config;
+ u64 val64;
+
+ if (fifo->config->enable != VXGE_HW_FIFO_ENABLE)
+ return;
+
+ vp_reg = fifo->vp_reg;
+ config = container_of(fifo->config, struct vxge_hw_vp_config, fifo);
+
+ if (config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
+ config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
+ val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+ val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+ fifo->tim_tti_cfg1_saved = val64;
+ writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+ }
+}
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring)
+{
+ u64 val64 = ring->tim_rti_cfg1_saved;
+
+ val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+ ring->tim_rti_cfg1_saved = val64;
+ writeq(val64, &ring->vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+}
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo)
+{
+ u64 val64 = fifo->tim_tti_cfg3_saved;
+ u64 timer = (fifo->rtimer * 1000) / 272;
+
+ val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+ if (timer)
+ val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+ VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(5);
+
+ writeq(val64, &fifo->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+ /* tti_cfg3_saved is not updated again because it is
+ * initialized at one place only - init time.
+ */
+}
+
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring)
+{
+ u64 val64 = ring->tim_rti_cfg3_saved;
+ u64 timer = (ring->rtimer * 1000) / 272;
+
+ val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+ if (timer)
+ val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+ VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(4);
+
+ writeq(val64, &ring->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+ /* rti_cfg3_saved is not updated again because it is
+ * initialized at one place only - init time.
+ */
+}
+
/**
* vxge_hw_channel_msix_mask - Mask MSIX Vector.
* @channeh: Channel for rx or tx handle
@@ -254,6 +316,23 @@
}
/**
+ * vxge_hw_channel_msix_clear - Unmask the MSIX Vector.
+ * @channel: Channel for rx or tx handle
+ * @msix_id: MSI ID
+ *
+ * The function unmasks the msix interrupt for the given msix_id
+ * if configured in MSIX oneshot mode
+ *
+ * Returns: 0
+ */
+void vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channel, int msix_id)
+{
+ __vxge_hw_pio_mem_write32_upper(
+ (u32) vxge_bVALn(vxge_mBIT(msix_id >> 2), 0, 32),
+ &channel->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+}
+
+/**
* vxge_hw_device_set_intr_type - Updates the configuration
* with new interrupt type.
* @hldev: HW device handle.
@@ -2191,19 +2270,14 @@
if (vpath->hldev->config.intr_mode ==
VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
__vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
+ VXGE_HW_ONE_SHOT_VECT0_EN_ONE_SHOT_VECT0_EN,
+ 0, 32), &vp_reg->one_shot_vect0_en);
+ __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
VXGE_HW_ONE_SHOT_VECT1_EN_ONE_SHOT_VECT1_EN,
0, 32), &vp_reg->one_shot_vect1_en);
- }
-
- if (vpath->hldev->config.intr_mode ==
- VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
__vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
VXGE_HW_ONE_SHOT_VECT2_EN_ONE_SHOT_VECT2_EN,
0, 32), &vp_reg->one_shot_vect2_en);
-
- __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
- VXGE_HW_ONE_SHOT_VECT3_EN_ONE_SHOT_VECT3_EN,
- 0, 32), &vp_reg->one_shot_vect3_en);
}
}
@@ -2229,6 +2303,32 @@
}
/**
+ * vxge_hw_vpath_msix_clear - Clear MSIX Vector.
+ * @vp: Virtual Path handle.
+ * @msix_id: MSI ID
+ *
+ * The function clears the msix interrupt for the given msix_id
+ *
+ * Returns: 0,
+ * Otherwise, VXGE_HW_ERR_WRONG_IRQ if the msix index is out of range
+ * status.
+ * See also:
+ */
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id)
+{
+ struct __vxge_hw_device *hldev = vp->vpath->hldev;
+
+ if ((hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT))
+ __vxge_hw_pio_mem_write32_upper(
+ (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+ &hldev->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+ else
+ __vxge_hw_pio_mem_write32_upper(
+ (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+ &hldev->common_reg->clear_msix_mask_vect[msix_id % 4]);
+}
+
+/**
* vxge_hw_vpath_msix_unmask - Unmask the MSIX Vector.
* @vp: Virtual Path handle.
* @msix_id: MSI ID
diff --git a/drivers/net/vxge/vxge-traffic.h b/drivers/net/vxge/vxge-traffic.h
index d48486d..9d9dfda 100644
--- a/drivers/net/vxge/vxge-traffic.h
+++ b/drivers/net/vxge/vxge-traffic.h
@@ -2142,6 +2142,10 @@
* Virtual Paths
*/
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring);
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo);
+
u32 vxge_hw_vpath_id(
struct __vxge_hw_vpath_handle *vpath_handle);
@@ -2245,6 +2249,8 @@
vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vpath_handle,
int msix_id);
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id);
+
void vxge_hw_device_flush_io(struct __vxge_hw_device *devh);
void
@@ -2270,6 +2276,9 @@
vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channelh, int msix_id);
void
+vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channelh, int msix_id);
+
+void
vxge_hw_channel_dtr_try_complete(struct __vxge_hw_channel *channel,
void **dtrh);
@@ -2282,7 +2291,8 @@
int
vxge_hw_channel_dtr_count(struct __vxge_hw_channel *channel);
-void
-vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id);
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo);
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring);
#endif
diff --git a/drivers/net/vxge/vxge-version.h b/drivers/net/vxge/vxge-version.h
index ad2f99b..581e215 100644
--- a/drivers/net/vxge/vxge-version.h
+++ b/drivers/net/vxge/vxge-version.h
@@ -16,8 +16,8 @@
#define VXGE_VERSION_MAJOR "2"
#define VXGE_VERSION_MINOR "5"
-#define VXGE_VERSION_FIX "1"
-#define VXGE_VERSION_BUILD "22082"
+#define VXGE_VERSION_FIX "2"
+#define VXGE_VERSION_BUILD "22259"
#define VXGE_VERSION_FOR "k"
#define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld))
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 359df04..9d339eb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -103,6 +103,8 @@
#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */
#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */
#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */
+#define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */
+#define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
new file mode 100644
index 0000000..473771a
--- /dev/null
+++ b/include/linux/cpu_rmap.h
@@ -0,0 +1,73 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+
+/**
+ * struct cpu_rmap - CPU affinity reverse-map
+ * @size: Number of objects to be reverse-mapped
+ * @used: Number of objects added
+ * @obj: Pointer to array of object pointers
+ * @near: For each CPU, the index and distance to the nearest object,
+ * based on affinity masks
+ */
+struct cpu_rmap {
+ u16 size, used;
+ void **obj;
+ struct {
+ u16 index;
+ u16 dist;
+ } near[0];
+};
+#define CPU_RMAP_DIST_INF 0xffff
+
+extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
+
+/**
+ * free_cpu_rmap - free CPU affinity reverse-map
+ * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
+ */
+static inline void free_cpu_rmap(struct cpu_rmap *rmap)
+{
+ kfree(rmap);
+}
+
+extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
+extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+ const struct cpumask *affinity);
+
+static inline u16 cpu_rmap_lookup_index(struct cpu_rmap *rmap, unsigned int cpu)
+{
+ return rmap->near[cpu].index;
+}
+
+static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu)
+{
+ return rmap->obj[rmap->near[cpu].index];
+}
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/**
+ * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs
+ * @size: Number of objects to be mapped
+ *
+ * Must be called in process context.
+ */
+static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size)
+{
+ return alloc_cpu_rmap(size, GFP_KERNEL);
+}
+extern void free_irq_cpu_rmap(struct cpu_rmap *rmap);
+
+extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq);
+
+#endif
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 010e2d8..d638e85 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -279,8 +279,6 @@
DCCP_MAX_STATES
};
-#define DCCP_STATE_MASK 0x1f
-
enum {
DCCPF_OPEN = TCPF_ESTABLISHED,
DCCPF_REQUESTING = TCPF_SYN_SENT,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 6485d2a..f4a2e6b 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -135,6 +135,7 @@
IFLA_VF_PORTS,
IFLA_PORT_SELF,
IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
__IFLA_MAX
};
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d42..63c5ad7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
@@ -240,6 +242,35 @@
extern int irq_select_affinity(unsigned int irq);
extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
+
+/**
+ * struct irq_affinity_notify - context for notification of IRQ affinity changes
+ * @irq: Interrupt to which notification applies
+ * @kref: Reference count, for internal use
+ * @work: Work item, for internal use
+ * @notify: Function to be called on change. This will be
+ * called in process context.
+ * @release: Function to be called on release. This will be
+ * called in process context. Once registered, the
+ * structure must only be freed when this function is
+ * called or later.
+ */
+struct irq_affinity_notify {
+ unsigned int irq;
+ struct kref kref;
+ struct work_struct work;
+ void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
+ void (*release)(struct kref *ref);
+};
+
+extern int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
+
+static inline void irq_run_affinity_notifiers(void)
+{
+ flush_scheduled_work();
+}
+
#else /* CONFIG_SMP */
static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -255,7 +286,7 @@
static inline int irq_select_affinity(unsigned int irq) { return 0; }
static inline int irq_set_affinity_hint(unsigned int irq,
- const struct cpumask *m)
+ const struct cpumask *m)
{
return -EINVAL;
}
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 5f43a3b..4deb383 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,14 @@
#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */
#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+ IP_VS_CONN_F_NOOUTPUT | \
+ IP_VS_CONN_F_INACTIVE | \
+ IP_VS_CONN_F_SEQ_MASK | \
+ IP_VS_CONN_F_NO_CPORT | \
+ IP_VS_CONN_F_TEMPLATE \
+ )
+
/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7..bfef56d 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
* For now it's included from <linux/irq.h>
*/
+struct irq_affinity_notify;
struct proc_dir_entry;
struct timer_rand_state;
/**
@@ -24,6 +25,7 @@
* @last_unhandled: aging timer for unhandled count
* @irqs_unhandled: stats field for spurious unhandled interrupts
* @lock: locking for SMP
+ * @affinity_notify: context for notification of affinity changes
* @pending_mask: pending rebalanced interrupts
* @threads_active: number of irqaction threads currently running
* @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
@@ -70,6 +72,7 @@
raw_spinlock_t lock;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
+ struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t pending_mask;
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346..8858422 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -75,6 +75,9 @@
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
#define NET_RX_DROP 1 /* packet dropped */
+/* Initial net device group. All devices belong to group 0 by default. */
+#define INIT_NETDEV_GROUP 0
+
/*
* Transmit return codes: transmit return codes originate from three different
* namespaces:
@@ -551,14 +554,16 @@
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
/*
- * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
- * tail pointer for that CPU's input queue at the time of last enqueue.
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
*/
struct rps_dev_flow {
u16 cpu;
- u16 fill;
+ u16 filter;
unsigned int last_qtail;
};
+#define RPS_NO_FILTER 0xffff
/*
* The rps_dev_flow_table structure contains a table of flow mappings.
@@ -608,6 +613,11 @@
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+#ifdef CONFIG_RFS_ACCEL
+extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+ u32 flow_id, u16 filter_id);
+#endif
+
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
struct rps_map __rcu *rps_map;
@@ -643,6 +653,14 @@
(nr_cpu_ids * sizeof(struct xps_map *)))
#endif /* CONFIG_XPS */
+#define TC_MAX_QUEUE 16
+#define TC_BITMASK 15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+ u16 count;
+ u16 offset;
+};
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -753,6 +771,18 @@
* int (*ndo_set_vf_port)(struct net_device *dev, int vf,
* struct nlattr *port[]);
* int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
+ * Called to setup 'tc' number of traffic classes in the net device. This
+ * is always called from the stack with the rtnl lock held and netif tx
+ * queues stopped. This allows the netdevice to perform queue management
+ * safely.
+ *
+ * RFS acceleration.
+ * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
+ * u16 rxq_index, u32 flow_id);
+ * Set hardware filter for RFS. rxq_index is the target queue index;
+ * flow_id is a flow ID to be passed to rps_may_expire_flow() later.
+ * Return the filter ID on success, or a negative error code.
*/
#define HAVE_NET_DEVICE_OPS
struct net_device_ops {
@@ -811,6 +841,7 @@
struct nlattr *port[]);
int (*ndo_get_vf_port)(struct net_device *dev,
int vf, struct sk_buff *skb);
+ int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
int (*ndo_fcoe_enable)(struct net_device *dev);
int (*ndo_fcoe_disable)(struct net_device *dev);
@@ -825,6 +856,12 @@
int (*ndo_fcoe_get_wwn)(struct net_device *dev,
u64 *wwn, int type);
#endif
+#ifdef CONFIG_RFS_ACCEL
+ int (*ndo_rx_flow_steer)(struct net_device *dev,
+ const struct sk_buff *skb,
+ u16 rxq_index,
+ u32 flow_id);
+#endif
};
/*
@@ -877,7 +914,11 @@
struct list_head unreg_list;
/* Net device features */
- unsigned long features;
+ u32 features;
+
+ /* VLAN feature mask */
+ u32 vlan_features;
+
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
@@ -1039,6 +1080,13 @@
/* Number of RX queues currently active in device */
unsigned int real_num_rx_queues;
+
+#ifdef CONFIG_RFS_ACCEL
+ /* CPU reverse-mapping for RX completion interrupts, indexed
+ * by RX queue number. Assigned by driver. This must only be
+ * set if the ndo_rx_flow_steer operation is defined. */
+ struct cpu_rmap *rx_cpu_rmap;
+#endif
#endif
rx_handler_func_t __rcu *rx_handler;
@@ -1132,9 +1180,6 @@
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
- /* VLAN feature mask */
- unsigned long vlan_features;
-
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
@@ -1143,6 +1188,9 @@
/* Data Center Bridging netlink ops */
const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
+ u8 num_tc;
+ struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+ u8 prio_tc_map[TC_BITMASK + 1];
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
/* max exchange id for FCoE LRO by ddp */
@@ -1153,12 +1201,66 @@
/* phy device may attach itself for hardware timestamping */
struct phy_device *phydev;
+
+ /* group the device belongs to */
+ int group;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
#define NETDEV_ALIGN 32
static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+ return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+ if (tc >= dev->num_tc)
+ return -EINVAL;
+
+ dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+ return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+ dev->num_tc = 0;
+ memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+ memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+ if (tc >= dev->num_tc)
+ return -EINVAL;
+
+ dev->tc_to_txq[tc].count = count;
+ dev->tc_to_txq[tc].offset = offset;
+ return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+ if (num_tc > TC_MAX_QUEUE)
+ return -EINVAL;
+
+ dev->num_tc = num_tc;
+ return 0;
+}
+
+static inline
+int netdev_get_num_tc(struct net_device *dev)
+{
+ return dev->num_tc;
+}
+
+static inline
struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
unsigned int index)
{
@@ -1300,7 +1402,7 @@
struct packet_type *,
struct net_device *);
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
+ u32 features);
int (*gso_send_check)(struct sk_buff *skb);
struct sk_buff **(*gro_receive)(struct sk_buff **head,
struct sk_buff *skb);
@@ -1844,6 +1946,7 @@
extern int dev_change_net_namespace(struct net_device *,
struct net *, const char *);
extern int dev_set_mtu(struct net_device *, int);
+extern void dev_set_group(struct net_device *, int);
extern int dev_set_mac_address(struct net_device *,
struct sockaddr *);
extern int dev_hard_start_xmit(struct sk_buff *skb,
@@ -2268,7 +2371,7 @@
extern int weight_p;
extern int netdev_set_master(struct net_device *dev, struct net_device *master);
extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features);
#ifdef CONFIG_BUG
extern void netdev_rx_csum_fault(struct net_device *dev);
#else
@@ -2295,22 +2398,21 @@
extern void linkwatch_run_queue(void);
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
- unsigned long mask);
-unsigned long netdev_fix_features(unsigned long features, const char *name);
+u32 netdev_increment_features(u32 all, u32 one, u32 mask);
+u32 netdev_fix_features(struct net_device *dev, u32 features);
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev);
-int netif_skb_features(struct sk_buff *skb);
+u32 netif_skb_features(struct sk_buff *skb);
-static inline int net_gso_ok(int features, int gso_type)
+static inline int net_gso_ok(u32 features, int gso_type)
{
int feature = gso_type << NETIF_F_GSO_SHIFT;
return (features & feature) == feature;
}
-static inline int skb_gso_ok(struct sk_buff *skb, int features)
+static inline int skb_gso_ok(struct sk_buff *skb, u32 features)
{
return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
(!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 1893837..eeec00a 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,16 +24,20 @@
#define NF_MAX_VERDICT NF_STOP
/* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
+
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
#define NF_VERDICT_QMASK 0xffff0000
#define NF_VERDICT_QBITS 16
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
/* only for userspace compatibility */
#ifndef __KERNEL__
@@ -41,6 +45,9 @@
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
#endif
enum nf_inet_hooks {
@@ -72,6 +79,10 @@
#ifdef __KERNEL__
#ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+ return -(verdict >> NF_VERDICT_QBITS);
+}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
@@ -267,7 +278,7 @@
int route_key_size;
};
-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
{
return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +368,9 @@
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9d40eff..89c0d1e 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -9,6 +9,7 @@
header-y += nfnetlink_log.h
header-y += nfnetlink_queue.h
header-y += x_tables.h
+header-y += xt_AUDIT.h
header-y += xt_CHECKSUM.h
header-y += xt_CLASSIFY.h
header-y += xt_CONNMARK.h
@@ -55,6 +56,7 @@
header-y += xt_realm.h
header-y += xt_recent.h
header-y += xt_sctp.h
+header-y += xt_socket.h
header-y += xt_state.h
header-y += xt_statistic.h
header-y += xt_string.h
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
new file mode 100644
index 0000000..064bc63
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_snmp.h
@@ -0,0 +1,9 @@
+#ifndef _NF_CONNTRACK_SNMP_H
+#define _NF_CONNTRACK_SNMP_H
+
+extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo);
+
+#endif /* _NF_CONNTRACK_SNMP_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 19711e3..debf1ae 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -42,6 +42,7 @@
CTA_SECMARK, /* obsolete */
CTA_ZONE,
CTA_SECCTX,
+ CTA_TIMESTAMP,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@
};
#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
+enum ctattr_tstamp {
+ CTA_TIMESTAMP_UNSPEC,
+ CTA_TIMESTAMP_START,
+ CTA_TIMESTAMP_STOP,
+ __CTA_TIMESTAMP_MAX
+};
+#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
+
enum ctattr_nat {
CTA_NAT_UNSPEC,
CTA_NAT_MINIP,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6712e71..3721952 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -611,8 +611,9 @@
extern void xt_compat_lock(u_int8_t af);
extern void xt_compat_unlock(u_int8_t af);
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
extern void xt_compat_flush_offsets(u_int8_t af);
+extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h
new file mode 100644
index 0000000..38751d2
--- /dev/null
+++ b/include/linux/netfilter/xt_AUDIT.h
@@ -0,0 +1,30 @@
+/*
+ * Header file for iptables xt_AUDIT target
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _XT_AUDIT_TARGET_H
+#define _XT_AUDIT_TARGET_H
+
+#include <linux/types.h>
+
+enum {
+ XT_AUDIT_TYPE_ACCEPT = 0,
+ XT_AUDIT_TYPE_DROP,
+ XT_AUDIT_TYPE_REJECT,
+ __XT_AUDIT_TYPE_MAX,
+};
+
+#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
+
+struct xt_audit_info {
+ __u8 type; /* XT_AUDIT_TYPE_* */
+};
+
+#endif /* _XT_AUDIT_TARGET_H */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 1b56410..b56e768 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -1,14 +1,16 @@
#ifndef _XT_CT_H
#define _XT_CT_H
+#include <linux/types.h>
+
#define XT_CT_NOTRACK 0x1
struct xt_ct_target_info {
- u_int16_t flags;
- u_int16_t zone;
- u_int32_t ct_events;
- u_int32_t exp_events;
- char helper[16];
+ __u16 flags;
+ __u16 zone;
+ __u32 ct_events;
+ __u32 exp_events;
+ char helper[16];
/* Used internally by the kernel */
struct nf_conn *ct __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h
index 2584f4a..9eafdbb 100644
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -20,4 +20,10 @@
__u16 queues_total;
};
+struct xt_NFQ_info_v2 {
+ __u16 queuenum;
+ __u16 queues_total;
+ __u16 bypass;
+};
+
#endif /* _XT_NFQ_TARGET_H */
diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h
index 2db5432..7157318 100644
--- a/include/linux/netfilter/xt_TCPOPTSTRIP.h
+++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h
@@ -1,13 +1,15 @@
#ifndef _XT_TCPOPTSTRIP_H
#define _XT_TCPOPTSTRIP_H
+#include <linux/types.h>
+
#define tcpoptstrip_set_bit(bmap, idx) \
(bmap[(idx) >> 5] |= 1U << (idx & 31))
#define tcpoptstrip_test_bit(bmap, idx) \
(((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
struct xt_tcpoptstrip_target_info {
- u_int32_t strip_bmap[8];
+ __u32 strip_bmap[8];
};
#endif /* _XT_TCPOPTSTRIP_H */
diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h
index 3f3d693..902043c 100644
--- a/include/linux/netfilter/xt_TPROXY.h
+++ b/include/linux/netfilter/xt_TPROXY.h
@@ -1,19 +1,21 @@
#ifndef _XT_TPROXY_H
#define _XT_TPROXY_H
+#include <linux/types.h>
+
/* TPROXY target is capable of marking the packet to perform
* redirection. We can get rid of that whenever we get support for
* mutliple targets in the same rule. */
struct xt_tproxy_target_info {
- u_int32_t mark_mask;
- u_int32_t mark_value;
+ __u32 mark_mask;
+ __u32 mark_value;
__be32 laddr;
__be16 lport;
};
struct xt_tproxy_target_info_v1 {
- u_int32_t mark_mask;
- u_int32_t mark_value;
+ __u32 mark_mask;
+ __u32 mark_value;
union nf_inet_addr laddr;
__be16 lport;
};
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h
index 8866826..9b883c8 100644
--- a/include/linux/netfilter/xt_cluster.h
+++ b/include/linux/netfilter/xt_cluster.h
@@ -1,15 +1,17 @@
#ifndef _XT_CLUSTER_MATCH_H
#define _XT_CLUSTER_MATCH_H
+#include <linux/types.h>
+
enum xt_cluster_flags {
XT_CLUSTER_F_INV = (1 << 0)
};
struct xt_cluster_match_info {
- u_int32_t total_nodes;
- u_int32_t node_mask;
- u_int32_t hash_seed;
- u_int32_t flags;
+ __u32 total_nodes;
+ __u32 node_mask;
+ __u32 hash_seed;
+ __u32 flags;
};
#define XT_CLUSTER_NODES_MAX 32
diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h
index eacfedc..0ea5e79 100644
--- a/include/linux/netfilter/xt_comment.h
+++ b/include/linux/netfilter/xt_comment.h
@@ -4,7 +4,7 @@
#define XT_MAX_COMMENT_LEN 256
struct xt_comment_info {
- unsigned char comment[XT_MAX_COMMENT_LEN];
+ char comment[XT_MAX_COMMENT_LEN];
};
#endif /* XT_COMMENT_H */
diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h
index 7e3284b..0ca66e9 100644
--- a/include/linux/netfilter/xt_connlimit.h
+++ b/include/linux/netfilter/xt_connlimit.h
@@ -1,8 +1,15 @@
#ifndef _XT_CONNLIMIT_H
#define _XT_CONNLIMIT_H
+#include <linux/types.h>
+
struct xt_connlimit_data;
+enum {
+ XT_CONNLIMIT_INVERT = 1 << 0,
+ XT_CONNLIMIT_DADDR = 1 << 1,
+};
+
struct xt_connlimit_info {
union {
union nf_inet_addr mask;
@@ -13,7 +20,14 @@
};
#endif
};
- unsigned int limit, inverse;
+ unsigned int limit;
+ union {
+ /* revision 0 */
+ unsigned int inverse;
+
+ /* revision 1 */
+ __u32 flags;
+ };
/* Used internally by the kernel */
struct xt_connlimit_data *data __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h
index 54f47a2..74b904d 100644
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -58,4 +58,19 @@
__u16 state_mask, status_mask;
};
+struct xt_conntrack_mtinfo3 {
+ union nf_inet_addr origsrc_addr, origsrc_mask;
+ union nf_inet_addr origdst_addr, origdst_mask;
+ union nf_inet_addr replsrc_addr, replsrc_mask;
+ union nf_inet_addr repldst_addr, repldst_mask;
+ __u32 expires_min, expires_max;
+ __u16 l4proto;
+ __u16 origsrc_port, origdst_port;
+ __u16 replsrc_port, repldst_port;
+ __u16 match_flags, invert_flags;
+ __u16 state_mask, status_mask;
+ __u16 origsrc_port_high, origdst_port_high;
+ __u16 replsrc_port_high, repldst_port_high;
+};
+
#endif /*_XT_CONNTRACK_H*/
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index b0d28c6..ca6e03e 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -1,6 +1,8 @@
#ifndef _XT_QUOTA_H
#define _XT_QUOTA_H
+#include <linux/types.h>
+
enum xt_quota_flags {
XT_QUOTA_INVERT = 0x1,
};
@@ -9,9 +11,9 @@
struct xt_quota_priv;
struct xt_quota_info {
- u_int32_t flags;
- u_int32_t pad;
- aligned_u64 quota;
+ __u32 flags;
+ __u32 pad;
+ aligned_u64 quota;
/* Used internally by the kernel */
struct xt_quota_priv *master;
diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h
index 6f475b8..26d7217 100644
--- a/include/linux/netfilter/xt_socket.h
+++ b/include/linux/netfilter/xt_socket.h
@@ -1,6 +1,8 @@
#ifndef _XT_SOCKET_H
#define _XT_SOCKET_H
+#include <linux/types.h>
+
enum {
XT_SOCKET_TRANSPARENT = 1 << 0,
};
diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h
index 14b6df4..7c37fac 100644
--- a/include/linux/netfilter/xt_time.h
+++ b/include/linux/netfilter/xt_time.h
@@ -1,14 +1,16 @@
#ifndef _XT_TIME_H
#define _XT_TIME_H 1
+#include <linux/types.h>
+
struct xt_time_info {
- u_int32_t date_start;
- u_int32_t date_stop;
- u_int32_t daytime_start;
- u_int32_t daytime_stop;
- u_int32_t monthdays_match;
- u_int8_t weekdays_match;
- u_int8_t flags;
+ __u32 date_start;
+ __u32 date_stop;
+ __u32 daytime_start;
+ __u32 daytime_stop;
+ __u32 monthdays_match;
+ __u8 weekdays_match;
+ __u8 flags;
};
enum {
diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h
index 9947f56..04d1bfe 100644
--- a/include/linux/netfilter/xt_u32.h
+++ b/include/linux/netfilter/xt_u32.h
@@ -1,6 +1,8 @@
#ifndef _XT_U32_H
#define _XT_U32_H 1
+#include <linux/types.h>
+
enum xt_u32_ops {
XT_U32_AND,
XT_U32_LEFTSH,
@@ -9,13 +11,13 @@
};
struct xt_u32_location_element {
- u_int32_t number;
- u_int8_t nextop;
+ __u32 number;
+ __u8 nextop;
};
struct xt_u32_value_element {
- u_int32_t min;
- u_int32_t max;
+ __u32 min;
+ __u32 max;
};
/*
@@ -27,14 +29,14 @@
struct xt_u32_test {
struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
- u_int8_t nnums;
- u_int8_t nvalues;
+ __u8 nnums;
+ __u8 nvalues;
};
struct xt_u32 {
struct xt_u32_test tests[XT_U32_MAXSIZE+1];
- u_int8_t ntests;
- u_int8_t invert;
+ __u8 ntests;
+ __u8 invert;
};
#endif /* _XT_U32_H */
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index c73ef0b..be5be15 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_802_3_H
#define __LINUX_BRIDGE_EBT_802_3_H
+#include <linux/types.h>
+
#define EBT_802_3_SAP 0x01
#define EBT_802_3_TYPE 0x02
@@ -24,24 +26,24 @@
/* ui has one byte ctrl, ni has two */
struct hdr_ui {
- uint8_t dsap;
- uint8_t ssap;
- uint8_t ctrl;
- uint8_t orig[3];
+ __u8 dsap;
+ __u8 ssap;
+ __u8 ctrl;
+ __u8 orig[3];
__be16 type;
};
struct hdr_ni {
- uint8_t dsap;
- uint8_t ssap;
+ __u8 dsap;
+ __u8 ssap;
__be16 ctrl;
- uint8_t orig[3];
+ __u8 orig[3];
__be16 type;
};
struct ebt_802_3_hdr {
- uint8_t daddr[6];
- uint8_t saddr[6];
+ __u8 daddr[6];
+ __u8 saddr[6];
__be16 len;
union {
struct hdr_ui ui;
@@ -59,10 +61,10 @@
#endif
struct ebt_802_3_info {
- uint8_t sap;
+ __u8 sap;
__be16 type;
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h
index 0009558..bd4e3ad 100644
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_AMONG_H
#define __LINUX_BRIDGE_EBT_AMONG_H
+#include <linux/types.h>
+
#define EBT_AMONG_DST 0x01
#define EBT_AMONG_SRC 0x02
@@ -30,7 +32,7 @@
*/
struct ebt_mac_wormhash_tuple {
- uint32_t cmp[2];
+ __u32 cmp[2];
__be32 ip;
};
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index cbf4843..522f3e4 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_ARP_H
#define __LINUX_BRIDGE_EBT_ARP_H
+#include <linux/types.h>
+
#define EBT_ARP_OPCODE 0x01
#define EBT_ARP_HTYPE 0x02
#define EBT_ARP_PTYPE 0x04
@@ -27,8 +29,8 @@
unsigned char smmsk[ETH_ALEN];
unsigned char dmaddr[ETH_ALEN];
unsigned char dmmsk[ETH_ALEN];
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h
index 6a708fb..c4bbc41 100644
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -15,6 +15,8 @@
#ifndef __LINUX_BRIDGE_EBT_IP_H
#define __LINUX_BRIDGE_EBT_IP_H
+#include <linux/types.h>
+
#define EBT_IP_SOURCE 0x01
#define EBT_IP_DEST 0x02
#define EBT_IP_TOS 0x04
@@ -31,12 +33,12 @@
__be32 daddr;
__be32 smsk;
__be32 dmsk;
- uint8_t tos;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
- uint16_t sport[2];
- uint16_t dport[2];
+ __u8 tos;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
+ __u16 sport[2];
+ __u16 dport[2];
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
index e5de987..42b8896 100644
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -12,14 +12,19 @@
#ifndef __LINUX_BRIDGE_EBT_IP6_H
#define __LINUX_BRIDGE_EBT_IP6_H
+#include <linux/types.h>
+
#define EBT_IP6_SOURCE 0x01
#define EBT_IP6_DEST 0x02
#define EBT_IP6_TCLASS 0x04
#define EBT_IP6_PROTO 0x08
#define EBT_IP6_SPORT 0x10
#define EBT_IP6_DPORT 0x20
+#define EBT_IP6_ICMP6 0x40
+
#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
- EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
+ EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
+ EBT_IP6_ICMP6)
#define EBT_IP6_MATCH "ip6"
/* the same values are used for the invflags */
@@ -28,12 +33,18 @@
struct in6_addr daddr;
struct in6_addr smsk;
struct in6_addr dmsk;
- uint8_t tclass;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
- uint16_t sport[2];
- uint16_t dport[2];
+ __u8 tclass;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
+ union {
+ __u16 sport[2];
+ __u8 icmpv6_type[2];
+ };
+ union {
+ __u16 dport[2];
+ __u8 icmpv6_code[2];
+ };
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h
index 4bf76b7..66d80b3 100644
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_LIMIT_H
#define __LINUX_BRIDGE_EBT_LIMIT_H
+#include <linux/types.h>
+
#define EBT_LIMIT_MATCH "limit"
/* timings are in milliseconds. */
@@ -10,13 +12,13 @@
seconds, or one every 59 hours. */
struct ebt_limit_info {
- u_int32_t avg; /* Average secs between packets * scale */
- u_int32_t burst; /* Period multiplier for upper limit. */
+ __u32 avg; /* Average secs between packets * scale */
+ __u32 burst; /* Period multiplier for upper limit. */
/* Used internally by the kernel */
unsigned long prev;
- u_int32_t credit;
- u_int32_t credit_cap, cost;
+ __u32 credit;
+ __u32 credit_cap, cost;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index cc2cdfb..7e7f1d1 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_LOG_H
#define __LINUX_BRIDGE_EBT_LOG_H
+#include <linux/types.h>
+
#define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
#define EBT_LOG_ARP 0x02
#define EBT_LOG_NFLOG 0x04
@@ -10,9 +12,9 @@
#define EBT_LOG_WATCHER "log"
struct ebt_log_info {
- uint8_t loglevel;
- uint8_t prefix[EBT_LOG_PREFIX_SIZE];
- uint32_t bitmask;
+ __u8 loglevel;
+ __u8 prefix[EBT_LOG_PREFIX_SIZE];
+ __u32 bitmask;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h
index 9ceb10e..410f9e5 100644
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -1,13 +1,15 @@
#ifndef __LINUX_BRIDGE_EBT_MARK_M_H
#define __LINUX_BRIDGE_EBT_MARK_M_H
+#include <linux/types.h>
+
#define EBT_MARK_AND 0x01
#define EBT_MARK_OR 0x02
#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
struct ebt_mark_m_info {
unsigned long mark, mask;
- uint8_t invert;
- uint8_t bitmask;
+ __u8 invert;
+ __u8 bitmask;
};
#define EBT_MARK_MATCH "mark_m"
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h
index 0528178..df829fc 100644
--- a/include/linux/netfilter_bridge/ebt_nflog.h
+++ b/include/linux/netfilter_bridge/ebt_nflog.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_NFLOG_H
#define __LINUX_BRIDGE_EBT_NFLOG_H
+#include <linux/types.h>
+
#define EBT_NFLOG_MASK 0x0
#define EBT_NFLOG_PREFIX_SIZE 64
@@ -10,11 +12,11 @@
#define EBT_NFLOG_DEFAULT_THRESHOLD 1
struct ebt_nflog_info {
- u_int32_t len;
- u_int16_t group;
- u_int16_t threshold;
- u_int16_t flags;
- u_int16_t pad;
+ __u32 len;
+ __u16 group;
+ __u16 threshold;
+ __u16 flags;
+ __u16 pad;
char prefix[EBT_NFLOG_PREFIX_SIZE];
};
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h
index 51a7998..c241bad 100644
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -1,9 +1,11 @@
#ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H
#define __LINUX_BRIDGE_EBT_PKTTYPE_H
+#include <linux/types.h>
+
struct ebt_pkttype_info {
- uint8_t pkt_type;
- uint8_t invert;
+ __u8 pkt_type;
+ __u8 invert;
};
#define EBT_PKTTYPE_MATCH "pkttype"
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h
index e503a0a..1025b9f 100644
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_STP_H
#define __LINUX_BRIDGE_EBT_STP_H
+#include <linux/types.h>
+
#define EBT_STP_TYPE 0x0001
#define EBT_STP_FLAGS 0x0002
@@ -21,24 +23,24 @@
#define EBT_STP_MATCH "stp"
struct ebt_stp_config_info {
- uint8_t flags;
- uint16_t root_priol, root_priou;
+ __u8 flags;
+ __u16 root_priol, root_priou;
char root_addr[6], root_addrmsk[6];
- uint32_t root_costl, root_costu;
- uint16_t sender_priol, sender_priou;
+ __u32 root_costl, root_costu;
+ __u16 sender_priol, sender_priou;
char sender_addr[6], sender_addrmsk[6];
- uint16_t portl, portu;
- uint16_t msg_agel, msg_ageu;
- uint16_t max_agel, max_ageu;
- uint16_t hello_timel, hello_timeu;
- uint16_t forward_delayl, forward_delayu;
+ __u16 portl, portu;
+ __u16 msg_agel, msg_ageu;
+ __u16 max_agel, max_ageu;
+ __u16 hello_timel, hello_timeu;
+ __u16 forward_delayl, forward_delayu;
};
struct ebt_stp_info {
- uint8_t type;
+ __u8 type;
struct ebt_stp_config_info config;
- uint16_t bitmask;
- uint16_t invflags;
+ __u16 bitmask;
+ __u16 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h
index b677e26..89a6bec 100644
--- a/include/linux/netfilter_bridge/ebt_ulog.h
+++ b/include/linux/netfilter_bridge/ebt_ulog.h
@@ -1,6 +1,8 @@
#ifndef _EBT_ULOG_H
#define _EBT_ULOG_H
+#include <linux/types.h>
+
#define EBT_ULOG_DEFAULT_NLGROUP 0
#define EBT_ULOG_DEFAULT_QTHRESHOLD 1
#define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */
@@ -10,7 +12,7 @@
#define EBT_ULOG_VERSION 1
struct ebt_ulog_info {
- uint32_t nlgroup;
+ __u32 nlgroup;
unsigned int cprange;
unsigned int qthreshold;
char prefix[EBT_ULOG_PREFIX_LEN];
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h
index 1d98be4..967d1d5 100644
--- a/include/linux/netfilter_bridge/ebt_vlan.h
+++ b/include/linux/netfilter_bridge/ebt_vlan.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_VLAN_H
#define __LINUX_BRIDGE_EBT_VLAN_H
+#include <linux/types.h>
+
#define EBT_VLAN_ID 0x01
#define EBT_VLAN_PRIO 0x02
#define EBT_VLAN_ENCAP 0x04
@@ -8,12 +10,12 @@
#define EBT_VLAN_MATCH "vlan"
struct ebt_vlan_info {
- uint16_t id; /* VLAN ID {1-4095} */
- uint8_t prio; /* VLAN User Priority {0-7} */
+ __u16 id; /* VLAN ID {1-4095} */
+ __u8 prio; /* VLAN User Priority {0-7} */
__be16 encap; /* VLAN Encapsulated frame code {0-65535} */
- uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg,
+ __u8 bitmask; /* Args bitmask bit 1=1 - ID arg,
bit 2=1 User-Priority arg, bit 3=1 encap*/
- uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
+ __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
bit 2=1 - inversed Pirority arg */
};
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index e5a3687..c6a204c 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -1,6 +1,8 @@
#ifndef _IPT_CLUSTERIP_H_target
#define _IPT_CLUSTERIP_H_target
+#include <linux/types.h>
+
enum clusterip_hashmode {
CLUSTERIP_HASHMODE_SIP = 0,
CLUSTERIP_HASHMODE_SIP_SPT,
@@ -17,15 +19,15 @@
struct ipt_clusterip_tgt_info {
- u_int32_t flags;
+ __u32 flags;
/* only relevant for new ones */
- u_int8_t clustermac[6];
- u_int16_t num_total_nodes;
- u_int16_t num_local_nodes;
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
- u_int32_t hash_mode;
- u_int32_t hash_initval;
+ __u8 clustermac[6];
+ __u16 num_total_nodes;
+ __u16 num_local_nodes;
+ __u16 local_nodes[CLUSTERIP_MAX_NODES];
+ __u32 hash_mode;
+ __u32 hash_initval;
/* Used internally by the kernel */
struct clusterip_config *config;
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h
index 7ca4591..bb88d53 100644
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -8,6 +8,8 @@
*/
#ifndef _IPT_ECN_TARGET_H
#define _IPT_ECN_TARGET_H
+
+#include <linux/types.h>
#include <linux/netfilter/xt_DSCP.h>
#define IPT_ECN_IP_MASK (~XT_DSCP_MASK)
@@ -19,11 +21,11 @@
#define IPT_ECN_OP_MASK 0xce
struct ipt_ECN_info {
- u_int8_t operation; /* bitset of operations */
- u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
+ __u8 operation; /* bitset of operations */
+ __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
union {
struct {
- u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+ __u8 ece:1, cwr:1; /* TCP ECT bits */
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index 2529660..5bca782 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -1,15 +1,17 @@
#ifndef _IPT_SAME_H
#define _IPT_SAME_H
+#include <linux/types.h>
+
#define IPT_SAME_MAX_RANGE 10
#define IPT_SAME_NODST 0x01
struct ipt_same_info {
unsigned char info;
- u_int32_t rangesize;
- u_int32_t ipnum;
- u_int32_t *iparray;
+ __u32 rangesize;
+ __u32 ipnum;
+ __u32 *iparray;
/* hangs off end. */
struct nf_nat_range range[IPT_SAME_MAX_RANGE];
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h
index ee6611e..f6ac169 100644
--- a/include/linux/netfilter_ipv4/ipt_TTL.h
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -4,6 +4,8 @@
#ifndef _IPT_TTL_H
#define _IPT_TTL_H
+#include <linux/types.h>
+
enum {
IPT_TTL_SET = 0,
IPT_TTL_INC,
@@ -13,8 +15,8 @@
#define IPT_TTL_MAXMODE IPT_TTL_DEC
struct ipt_TTL_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h
index 446de6a..0da4223 100644
--- a/include/linux/netfilter_ipv4/ipt_addrtype.h
+++ b/include/linux/netfilter_ipv4/ipt_addrtype.h
@@ -1,6 +1,8 @@
#ifndef _IPT_ADDRTYPE_H
#define _IPT_ADDRTYPE_H
+#include <linux/types.h>
+
enum {
IPT_ADDRTYPE_INVERT_SOURCE = 0x0001,
IPT_ADDRTYPE_INVERT_DEST = 0x0002,
@@ -9,17 +11,17 @@
};
struct ipt_addrtype_info_v1 {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t flags;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 flags;
};
/* revision 0 */
struct ipt_addrtype_info {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t invert_source;
- u_int32_t invert_dest;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 invert_source;
+ __u32 invert_dest;
};
#endif
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h
index 2e555b4..4e02bb0 100644
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -1,9 +1,11 @@
#ifndef _IPT_AH_H
#define _IPT_AH_H
+#include <linux/types.h>
+
struct ipt_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u8 invflags; /* Inverse flags */
};
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index 9945baa..eabf95f 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -8,6 +8,8 @@
*/
#ifndef _IPT_ECN_H
#define _IPT_ECN_H
+
+#include <linux/types.h>
#include <linux/netfilter/xt_dscp.h>
#define IPT_ECN_IP_MASK (~XT_DSCP_MASK)
@@ -20,12 +22,12 @@
/* match info */
struct ipt_ecn_info {
- u_int8_t operation;
- u_int8_t invert;
- u_int8_t ip_ect;
+ __u8 operation;
+ __u8 invert;
+ __u8 ip_ect;
union {
struct {
- u_int8_t ect;
+ __u8 ect;
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h
index ee24fd8..37bee44 100644
--- a/include/linux/netfilter_ipv4/ipt_ttl.h
+++ b/include/linux/netfilter_ipv4/ipt_ttl.h
@@ -4,6 +4,8 @@
#ifndef _IPT_TTL_H
#define _IPT_TTL_H
+#include <linux/types.h>
+
enum {
IPT_TTL_EQ = 0, /* equals */
IPT_TTL_NE, /* not equals */
@@ -13,8 +15,8 @@
struct ipt_ttl_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h
index afb7813..ebd8ead 100644
--- a/include/linux/netfilter_ipv6/ip6t_HL.h
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -5,6 +5,8 @@
#ifndef _IP6T_HL_H
#define _IP6T_HL_H
+#include <linux/types.h>
+
enum {
IP6T_HL_SET = 0,
IP6T_HL_INC,
@@ -14,8 +16,8 @@
#define IP6T_HL_MAXMODE IP6T_HL_DEC
struct ip6t_HL_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h
index 6be6504..205ed62 100644
--- a/include/linux/netfilter_ipv6/ip6t_REJECT.h
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -1,6 +1,8 @@
#ifndef _IP6T_REJECT_H
#define _IP6T_REJECT_H
+#include <linux/types.h>
+
enum ip6t_reject_with {
IP6T_ICMP6_NO_ROUTE,
IP6T_ICMP6_ADM_PROHIBITED,
@@ -12,7 +14,7 @@
};
struct ip6t_reject_info {
- u_int32_t with; /* reject type */
+ __u32 with; /* reject type */
};
#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h
index 17a745c..5da2b65 100644
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -1,11 +1,13 @@
#ifndef _IP6T_AH_H
#define _IP6T_AH_H
+#include <linux/types.h>
+
struct ip6t_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t hdrres; /* Test of the Reserved Filed */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 hdrres; /* Test of the Reserved Filed */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_AH_SPI 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h
index 3724d08..b47f61b 100644
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -1,11 +1,13 @@
#ifndef _IP6T_FRAG_H
#define _IP6T_FRAG_H
+#include <linux/types.h>
+
struct ip6t_frag {
- u_int32_t ids[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 ids[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_FRAG_IDS 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h
index 5ef91b8..6e76dbc 100644
--- a/include/linux/netfilter_ipv6/ip6t_hl.h
+++ b/include/linux/netfilter_ipv6/ip6t_hl.h
@@ -5,6 +5,8 @@
#ifndef _IP6T_HL_H
#define _IP6T_HL_H
+#include <linux/types.h>
+
enum {
IP6T_HL_EQ = 0, /* equals */
IP6T_HL_NE, /* not equals */
@@ -14,8 +16,8 @@
struct ip6t_hl_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
index 01dfd44..efae3a2 100644
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -8,10 +8,12 @@
#ifndef __IPV6HEADER_H
#define __IPV6HEADER_H
+#include <linux/types.h>
+
struct ip6t_ipv6header_info {
- u_int8_t matchflags;
- u_int8_t invflags;
- u_int8_t modeflag;
+ __u8 matchflags;
+ __u8 invflags;
+ __u8 modeflag;
};
#define MASK_HOPOPTS 128
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
index 18549bc..a7729a5 100644
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -1,10 +1,12 @@
#ifndef _IP6T_MH_H
#define _IP6T_MH_H
+#include <linux/types.h>
+
/* MH matching stuff */
struct ip6t_mh {
- u_int8_t types[2]; /* MH type range */
- u_int8_t invflags; /* Inverse flags */
+ __u8 types[2]; /* MH type range */
+ __u8 invflags; /* Inverse flags */
};
/* Values for "invflags" field in struct ip6t_mh. */
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h
index 62d89bc..17d419a 100644
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -1,14 +1,16 @@
#ifndef _IP6T_OPTS_H
#define _IP6T_OPTS_H
+#include <linux/types.h>
+
#define IP6T_OPTS_OPTSNR 16
struct ip6t_opts {
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
- u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */
- u_int8_t optsnr; /* Nr of OPts */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
+ __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */
+ __u8 optsnr; /* Nr of OPts */
};
#define IP6T_OPTS_LEN 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h
index ab91bfd..7605a5f 100644
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -1,18 +1,19 @@
#ifndef _IP6T_RT_H
#define _IP6T_RT_H
+#include <linux/types.h>
/*#include <linux/in6.h>*/
#define IP6T_RT_HOPS 16
struct ip6t_rt {
- u_int32_t rt_type; /* Routing Type */
- u_int32_t segsleft[2]; /* Segments Left */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 rt_type; /* Routing Type */
+ __u32 segsleft[2]; /* Segments Left */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */
- u_int8_t addrnr; /* Nr of Addresses */
+ __u8 addrnr; /* Nr of Addresses */
};
#define IP6T_RT_TYP 0x01
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 2cfa4bc..776cd93 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -481,4 +481,16 @@
__u32 deficit;
};
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+struct tc_mqprio_qopt {
+ __u8 num_tc;
+ __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
+ __u8 hw;
+ __u16 count[TC_QOPT_MAX_QUEUE];
+ __u16 offset[TC_QOPT_MAX_QUEUE];
+};
+
#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf221d6..31f02d0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1801,6 +1801,15 @@
prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \
skb = skb->prev)
+#define skb_queue_reverse_walk_safe(queue, skb, tmp) \
+ for (skb = (queue)->prev, tmp = skb->prev; \
+ skb != (struct sk_buff *)(queue); \
+ skb = tmp, tmp = skb->prev)
+
+#define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \
+ for (tmp = skb->prev; \
+ skb != (struct sk_buff *)(queue); \
+ skb = tmp, tmp = skb->prev)
static inline bool skb_has_frag_list(const struct sk_buff *skb)
{
@@ -1868,7 +1877,7 @@
extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
int shiftlen);
-extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
+extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features);
static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
int len, void *buffer)
diff --git a/include/net/dst.h b/include/net/dst.h
index 93b0310..be5a0d4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -72,7 +72,7 @@
u32 _metrics[RTAX_MAX];
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 tclassid;
#else
__u32 __pad2;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 07bdb5e..65d1fcd 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -55,7 +55,7 @@
int nh_weight;
int nh_power;
#endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 nh_tclassid;
#endif
int nh_oif;
@@ -201,7 +201,7 @@
extern int __net_init fib4_rules_init(struct net *net);
extern void __net_exit fib4_rules_exit(struct net *net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
extern u32 fib_rules_tclass(struct fib_result *res);
#endif
@@ -235,7 +235,7 @@
static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
{
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
u32 rtag;
#endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b7bbd6c..b23bea6 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -28,6 +28,80 @@
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#endif
+#include <net/net_namespace.h> /* Netw namespace */
+
+/*
+ * Generic access of ipvs struct
+ */
+static inline struct netns_ipvs *net_ipvs(struct net* net)
+{
+ return net->ipvs;
+}
+/*
+ * Get net ptr from skb in traffic cases
+ * use skb_sknet when call is from userland (ioctl or netlink)
+ */
+static inline struct net *skb_net(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+ /*
+ * This is used for debug only.
+ * Start with the most likely hit
+ * End with BUG
+ */
+ if (likely(skb->dev && skb->dev->nd_net))
+ return dev_net(skb->dev);
+ if (skb_dst(skb)->dev)
+ return dev_net(skb_dst(skb)->dev);
+ WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
+ __func__, __LINE__);
+ if (likely(skb->sk && skb->sk->sk_net))
+ return sock_net(skb->sk);
+ pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+ __func__, __LINE__);
+ BUG();
+#else
+ return dev_net(skb->dev ? : skb_dst(skb)->dev);
+#endif
+#else
+ return &init_net;
+#endif
+}
+
+static inline struct net *skb_sknet(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Start with the most likely hit */
+ if (likely(skb->sk && skb->sk->sk_net))
+ return sock_net(skb->sk);
+ WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
+ __func__, __LINE__);
+ if (likely(skb->dev && skb->dev->nd_net))
+ return dev_net(skb->dev);
+ pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+ __func__, __LINE__);
+ BUG();
+#else
+ return sock_net(skb->sk);
+#endif
+#else
+ return &init_net;
+#endif
+}
+/*
+ * This one needed for single_open_net since net is stored directly in
+ * private not as a struct i.e. seq_file_net cant be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+ return (struct net *)seq->private;
+#else
+ return &init_net;
+#endif
+}
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
@@ -258,6 +332,23 @@
before last resized pkt */
};
+/*
+ * counters per cpu
+ */
+struct ip_vs_counters {
+ __u32 conns; /* connections scheduled */
+ __u32 inpkts; /* incoming packets */
+ __u32 outpkts; /* outgoing packets */
+ __u64 inbytes; /* incoming bytes */
+ __u64 outbytes; /* outgoing bytes */
+};
+/*
+ * Stats per cpu
+ */
+struct ip_vs_cpu_stats {
+ struct ip_vs_counters ustats;
+ struct u64_stats_sync syncp;
+};
/*
* IPVS statistics objects
@@ -279,17 +370,34 @@
};
struct ip_vs_stats {
- struct ip_vs_stats_user ustats; /* statistics */
+ struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_estimator est; /* estimator */
-
- spinlock_t lock; /* spin lock */
+ struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
+ spinlock_t lock; /* spin lock */
};
+/*
+ * Helper Macros for per cpu
+ * ipvs->tot_stats->ustats.count
+ */
+#define IPVS_STAT_INC(ipvs, count) \
+ __this_cpu_inc((ipvs)->ustats->count)
+
+#define IPVS_STAT_ADD(ipvs, count, value) \
+ do {\
+ write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
+ raw_smp_processor_id())); \
+ __this_cpu_add((ipvs)->ustats->count, value); \
+ write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
+ raw_smp_processor_id())); \
+ } while (0)
+
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
struct ip_vs_app;
struct sk_buff;
+struct ip_vs_proto_data;
struct ip_vs_protocol {
struct ip_vs_protocol *next;
@@ -297,21 +405,22 @@
u16 protocol;
u16 num_states;
int dont_defrag;
- atomic_t appcnt; /* counter of proto app incs */
- int *timeout_table; /* protocol timeout table */
void (*init)(struct ip_vs_protocol *pp);
void (*exit)(struct ip_vs_protocol *pp);
+ void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+ void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
int (*conn_schedule)(int af, struct sk_buff *skb,
- struct ip_vs_protocol *pp,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
(*conn_in_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -319,7 +428,6 @@
struct ip_vs_conn *
(*conn_out_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -337,11 +445,11 @@
int (*state_transition)(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
- int (*register_app)(struct ip_vs_app *inc);
+ int (*register_app)(struct net *net, struct ip_vs_app *inc);
- void (*unregister_app)(struct ip_vs_app *inc);
+ void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
int (*app_conn_bind)(struct ip_vs_conn *cp);
@@ -350,14 +458,26 @@
int offset,
const char *msg);
- void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
-
- int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+ void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};
-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+ struct ip_vs_proto_data *next;
+ struct ip_vs_protocol *pp;
+ int *timeout_table; /* protocol timeout table */
+ atomic_t appcnt; /* counter of proto app incs. */
+ struct tcp_states_t *tcp_state_table;
+};
+
+extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
+ unsigned short proto);
struct ip_vs_conn_param {
+ struct net *net;
const union nf_inet_addr *caddr;
const union nf_inet_addr *vaddr;
__be16 cport;
@@ -375,16 +495,19 @@
*/
struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+ struct net *net; /* Name space */
+#endif
/* Protocol, addresses and port numbers */
- u16 af; /* address family */
- union nf_inet_addr caddr; /* client address */
- union nf_inet_addr vaddr; /* virtual address */
- union nf_inet_addr daddr; /* destination address */
- volatile __u32 flags; /* status flags */
- __be16 cport;
- __be16 vport;
- __be16 dport;
+ u16 af; /* address family */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __u32 fwmark; /* Fire wall mark from skb */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ union nf_inet_addr daddr; /* destination address */
+ volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
/* counter and timer */
@@ -422,10 +545,38 @@
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
+ const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;
};
+/*
+ * To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net;
+#else
+ return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+ struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net == net;
+#else
+ return 1;
+#endif
+}
/*
* Extended internal versions of struct ip_vs_service_user and
@@ -485,6 +636,7 @@
unsigned flags; /* service status flags */
unsigned timeout; /* persistent timeout in ticks */
__be32 netmask; /* grouping granularity */
+ struct net *net;
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
@@ -510,8 +662,8 @@
struct list_head d_list; /* for table with all the dests */
u16 af; /* address family */
- union nf_inet_addr addr; /* IP address of the server */
__be16 port; /* port number of the server */
+ union nf_inet_addr addr; /* IP address of the server */
volatile unsigned flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
@@ -538,8 +690,8 @@
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
- union nf_inet_addr vaddr; /* virtual IP address */
__be16 vport; /* virtual port number */
+ union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */
};
@@ -674,13 +826,14 @@
IP_VS_DIR_LAST,
};
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr,
__be16 cport,
const union nf_inet_addr *vaddr,
__be16 vport,
struct ip_vs_conn_param *p)
{
+ p->net = net;
p->af = af;
p->protocol = protocol;
p->caddr = caddr;
@@ -695,7 +848,6 @@
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -703,7 +855,6 @@
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -719,14 +870,14 @@
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr,
__be16 dport, unsigned flags,
- struct ip_vs_dest *dest);
+ struct ip_vs_dest *dest, __u32 fwmark);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
extern const char * ip_vs_state_name(__u16 proto, int state);
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
extern int ip_vs_conn_init(void);
extern void ip_vs_conn_cleanup(void);
@@ -796,12 +947,12 @@
* (from ip_vs_app.c)
*/
#define IP_VS_APP_MAX_PORTS 8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+ __u16 proto, __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
@@ -814,15 +965,27 @@
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
-extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
-extern void ip_vs_pe_put(struct ip_vs_pe *pe);
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
+
+static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
+{
+ if (pe && pe->module)
+ __module_get(pe->module);
+}
+
+static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
+{
+ if (pe && pe->module)
+ module_put(pe->module);
+}
/*
* IPVS protocol functions (from ip_vs_proto.c)
*/
extern int ip_vs_protocol_init(void);
extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
extern int *ip_vs_create_timeout_table(int *table, int size);
extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -852,26 +1015,21 @@
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored);
+ struct ip_vs_proto_data *pd, int *ignored);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
/*
* IPVS control data and functions (from ip_vs_ctl.c)
*/
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +1038,7 @@
}
extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
@@ -888,8 +1046,9 @@
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
- const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
+ __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ __u16 protocol, __u32 fwmark);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
@@ -897,14 +1056,12 @@
* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c)
*/
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+ __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);
/*
@@ -912,8 +1069,8 @@
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
/*
@@ -955,11 +1112,13 @@
extern int ip_vs_drop_rate;
extern int ip_vs_drop_counter;
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
{
- if (!ip_vs_drop_rate) return 0;
- if (--ip_vs_drop_counter > 0) return 0;
- ip_vs_drop_counter = ip_vs_drop_rate;
+ if (!ipvs->drop_rate)
+ return 0;
+ if (--ipvs->drop_counter > 0)
+ return 0;
+ ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
@@ -1047,9 +1206,9 @@
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
- return sysctl_ip_vs_conntrack;
+ return ipvs->sysctl_conntrack;
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1062,7 +1221,7 @@
#else
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
return 0;
}
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1bf812b..b3b4a34 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -20,6 +20,7 @@
#include <net/netns/conntrack.h>
#endif
#include <net/netns/xfrm.h>
+#include <net/netns/ip_vs.h>
struct proc_dir_entry;
struct net_device;
@@ -94,6 +95,7 @@
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
+ struct netns_ipvs *ipvs;
};
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d85cff1..d0d1337 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -50,11 +50,24 @@
/* per conntrack: application helper private data */
union nf_conntrack_help {
/* insert conntrack helper private data (master) here */
+#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
struct nf_ct_ftp_master ct_ftp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
+ defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
struct nf_ct_pptp_master ct_pptp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_H323) || \
+ defined(CONFIG_NF_CONNTRACK_H323_MODULE)
struct nf_ct_h323_master ct_h323_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SANE) || \
+ defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
struct nf_ct_sane_master ct_sane_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
struct nf_ct_sip_master ct_sip_info;
+#endif
};
#include <linux/types.h>
@@ -116,14 +129,14 @@
u_int32_t secmark;
#endif
- /* Storage reserved for other modules: */
- union nf_conntrack_proto proto;
-
/* Extensions */
struct nf_ct_ext *ext;
#ifdef CONFIG_NET_NS
struct net *ct_net;
#endif
+
+ /* Storage reserved for other modules, must be the last member */
+ union nf_conntrack_proto proto;
};
static inline struct nf_conn *
@@ -189,9 +202,9 @@
* Allocate a hashtable of hlist_head (if nulls == 0),
* or hlist_nulls_head (if nulls == 1)
*/
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 96ba5f7..8fdb04b 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -23,12 +23,17 @@
static inline struct nf_conntrack_ecache *
nf_ct_ecache_find(const struct nf_conn *ct)
{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+#else
+ return NULL;
+#endif
}
static inline struct nf_conntrack_ecache *
nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *e;
@@ -45,6 +50,9 @@
e->expmask = expmask;
}
return e;
+#else
+ return NULL;
+#endif
};
#ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@
int (*fcn)(unsigned int events, struct nf_ct_event *item);
};
-extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
@@ -159,7 +167,7 @@
int (*fcn)(unsigned int events, struct nf_exp_event *item);
};
-extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 0772d29..2dcf317 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -7,10 +7,19 @@
enum nf_ct_ext_id {
NF_CT_EXT_HELPER,
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
NF_CT_EXT_NAT,
+#endif
NF_CT_EXT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
NF_CT_EXT_ECACHE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
NF_CT_EXT_ZONE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ NF_CT_EXT_TSTAMP,
+#endif
NF_CT_EXT_NUM,
};
@@ -19,6 +28,7 @@
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 32c305d..f1c1311 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -63,4 +63,10 @@
extern int nf_conntrack_helper_init(void);
extern void nf_conntrack_helper_fini(void);
+extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int timeout);
+
#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index a754761..e8010f4 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -73,7 +73,7 @@
struct module *me;
};
-extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
new file mode 100644
index 0000000..fc9c82b
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -0,0 +1,65 @@
+#ifndef _NF_CONNTRACK_TSTAMP_H
+#define _NF_CONNTRACK_TSTAMP_H
+
+#include <net/net_namespace.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_tstamp {
+ u_int64_t start;
+ u_int64_t stop;
+};
+
+static inline
+struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
+#else
+ return NULL;
+#endif
+}
+
+static inline
+struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ struct net *net = nf_ct_net(ct);
+
+ if (!net->ct.sysctl_tstamp)
+ return NULL;
+
+ return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
+#else
+ return NULL;
+#endif
+};
+
+static inline bool nf_ct_tstamp_enabled(struct net *net)
+{
+ return net->ct.sysctl_tstamp != 0;
+}
+
+static inline void nf_ct_set_tstamp(struct net *net, bool enable)
+{
+ net->ct.sysctl_tstamp = enable;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+extern int nf_conntrack_tstamp_init(struct net *net);
+extern void nf_conntrack_tstamp_fini(struct net *net);
+#else
+static inline int nf_conntrack_tstamp_init(struct net *net)
+{
+ return 0;
+}
+
+static inline void nf_conntrack_tstamp_fini(struct net *net)
+{
+ return;
+}
+#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */
+
+#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index f5f09f0..aff80b1 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -56,7 +56,9 @@
/* per conntrack: nat application helper private data */
union nf_conntrack_nat_help {
/* insert nat helper private data here */
+#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
struct nf_nat_pptp nat_pptp_info;
+#endif
};
struct nf_conn;
@@ -84,7 +86,11 @@
static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
{
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+#else
+ return NULL;
+#endif
}
#else /* !__KERNEL__: iptables wants this to compile. */
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 33602ab..3dc7b98 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -21,9 +21,9 @@
enum nf_nat_manip_type manip)
{
if (manip == IP_NAT_MANIP_SRC)
- return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+ return ct->status & IPS_SRC_NAT_DONE;
else
- return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+ return ct->status & IPS_DST_NAT_DONE;
}
struct nlattr;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index d4958d4..341eb08 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -21,15 +21,15 @@
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
+ int sysctl_tstamp;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
+ struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
- int hash_vmalloc;
- int expect_vmalloc;
char *slabname;
};
#endif
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644
index 0000000..259ebac
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,143 @@
+/*
+ * IP Virtual Server
+ * Data structure for network namspace
+ *
+ */
+
+#ifndef IP_VS_H_
+#define IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+ int gen; /* Generation */
+ /*
+ * Hash table: for real service lookups
+ */
+ #define IP_VS_RTAB_BITS 4
+ #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+ #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+ struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_app */
+ struct list_head app_list;
+ struct mutex app_mutex;
+ struct lock_class_key app_key; /* mutex debuging */
+
+ /* ip_vs_proto */
+ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
+ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+ /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ #define TCP_APP_TAB_BITS 4
+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+ spinlock_t tcp_app_lock;
+#endif
+ /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ #define UDP_APP_TAB_BITS 4
+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+ struct list_head udp_apps[UDP_APP_TAB_SIZE];
+ spinlock_t udp_app_lock;
+#endif
+ /* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+ #define SCTP_APP_TAB_BITS 4
+ #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
+ #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
+ /* Hash table for SCTP application incarnations */
+ struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+ spinlock_t sctp_app_lock;
+#endif
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
+
+ /* ip_vs_ctl */
+ struct ip_vs_stats *tot_stats; /* Statistics & est. */
+ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
+ seqcount_t *ustats_seq; /* u64 read retry */
+
+ int num_services; /* no of virtual services */
+ /* 1/rate drop and drop-entry variables */
+ struct delayed_work defense_work; /* Work handler */
+ int drop_rate;
+ int drop_counter;
+ atomic_t dropentry;
+ /* locks in ctl.c */
+ spinlock_t dropentry_lock; /* drop entry handling */
+ spinlock_t droppacket_lock; /* drop packet handling */
+ spinlock_t securetcp_lock; /* state and timeout tables */
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* Trash for destinations */
+ struct list_head dest_trash;
+ /* Service counters */
+ atomic_t ftpsvc_counter;
+ atomic_t nullsvc_counter;
+
+ /* sys-ctl struct */
+ struct ctl_table_header *sysctl_hdr;
+ struct ctl_table *sysctl_tbl;
+ /* sysctl variables */
+ int sysctl_amemthresh;
+ int sysctl_am_droprate;
+ int sysctl_drop_entry;
+ int sysctl_drop_packet;
+ int sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+ int sysctl_conntrack;
+#endif
+ int sysctl_snat_reroute;
+ int sysctl_sync_ver;
+ int sysctl_cache_bypass;
+ int sysctl_expire_nodest_conn;
+ int sysctl_expire_quiescent_template;
+ int sysctl_sync_threshold[2];
+ int sysctl_nat_icmp_send;
+
+ /* ip_vs_lblc */
+ int sysctl_lblc_expiration;
+ struct ctl_table_header *lblc_ctl_header;
+ struct ctl_table *lblc_ctl_table;
+ /* ip_vs_lblcr */
+ int sysctl_lblcr_expiration;
+ struct ctl_table_header *lblcr_ctl_header;
+ struct ctl_table *lblcr_ctl_table;
+ /* ip_vs_est */
+ struct list_head est_list; /* estimator list */
+ spinlock_t est_lock;
+ struct timer_list est_timer; /* Estimation timer */
+ /* ip_vs_sync */
+ struct list_head sync_queue;
+ spinlock_t sync_lock;
+ struct ip_vs_sync_buff *sync_buff;
+ spinlock_t sync_buff_lock;
+ struct sockaddr_in sync_mcast_addr;
+ struct task_struct *master_thread;
+ struct task_struct *backup_thread;
+ int send_mesg_maxlen;
+ int recv_mesg_maxlen;
+ volatile int sync_state;
+ volatile int master_syncid;
+ volatile int backup_syncid;
+ /* multicast interface name */
+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ /* net name space ptr */
+ struct net *net; /* Needed by timer routines */
+};
+
+#endif /* IP_VS_H_ */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d68c3f1..e2e2ef5 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,7 +43,6 @@
struct xt_table *nat_table;
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
- int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index dc07495..6f7eb80 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -38,7 +38,7 @@
void (*err_handler)(struct sk_buff *skb, u32 info);
int (*gso_send_check)(struct sk_buff *skb);
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
+ u32 features);
struct sk_buff **(*gro_receive)(struct sk_buff **head,
struct sk_buff *skb);
int (*gro_complete)(struct sk_buff *skb);
@@ -57,7 +57,7 @@
int (*gso_send_check)(struct sk_buff *skb);
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
+ u32 features);
struct sk_buff **(*gro_receive)(struct sk_buff **head,
struct sk_buff *skb);
int (*gro_complete)(struct sk_buff *skb);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 160a407..16626a0 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -31,10 +31,12 @@
* following bits are only changed while qdisc lock is held
*/
enum qdisc___state_t {
- __QDISC___STATE_RUNNING,
+ __QDISC___STATE_RUNNING = 1,
+ __QDISC___STATE_THROTTLED = 2,
};
struct qdisc_size_table {
+ struct rcu_head rcu;
struct list_head list;
struct tc_sizespec szopts;
int refcnt;
@@ -46,14 +48,13 @@
struct sk_buff * (*dequeue)(struct Qdisc *dev);
unsigned flags;
#define TCQ_F_BUILTIN 1
-#define TCQ_F_THROTTLED 2
-#define TCQ_F_INGRESS 4
-#define TCQ_F_CAN_BYPASS 8
-#define TCQ_F_MQROOT 16
+#define TCQ_F_INGRESS 2
+#define TCQ_F_CAN_BYPASS 4
+#define TCQ_F_MQROOT 8
#define TCQ_F_WARN_NONWC (1 << 16)
int padded;
struct Qdisc_ops *ops;
- struct qdisc_size_table *stab;
+ struct qdisc_size_table __rcu *stab;
struct list_head list;
u32 handle;
u32 parent;
@@ -78,25 +79,43 @@
unsigned long state;
struct sk_buff_head q;
struct gnet_stats_basic_packed bstats;
- unsigned long __state;
+ unsigned int __state;
struct gnet_stats_queue qstats;
struct rcu_head rcu_head;
spinlock_t busylock;
};
-static inline bool qdisc_is_running(struct Qdisc *qdisc)
+static inline bool qdisc_is_running(const struct Qdisc *qdisc)
{
- return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+ return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
- return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+ if (qdisc_is_running(qdisc))
+ return false;
+ qdisc->__state |= __QDISC___STATE_RUNNING;
+ return true;
}
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
- __clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+ qdisc->__state &= ~__QDISC___STATE_RUNNING;
+}
+
+static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
+{
+ return (qdisc->__state & __QDISC___STATE_THROTTLED) ? true : false;
+}
+
+static inline void qdisc_throttled(struct Qdisc *qdisc)
+{
+ qdisc->__state |= __QDISC___STATE_THROTTLED;
+}
+
+static inline void qdisc_unthrottled(struct Qdisc *qdisc)
+{
+ qdisc->__state &= ~__QDISC___STATE_THROTTLED;
}
struct Qdisc_class_ops {
@@ -331,8 +350,8 @@
struct Qdisc_ops *ops);
extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
struct Qdisc_ops *ops, u32 parentid);
-extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
- struct qdisc_size_table *stab);
+extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+ const struct qdisc_size_table *stab);
extern void tcf_destroy(struct tcf_proto *tp);
extern void tcf_destroy_chain(struct tcf_proto **fl);
@@ -411,12 +430,20 @@
#define net_xmit_drop_count(e) (1)
#endif
-static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
+ const struct Qdisc *sch)
{
#ifdef CONFIG_NET_SCHED
- if (sch->stab)
- qdisc_calculate_pkt_len(skb, sch->stab);
+ struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
+
+ if (stab)
+ __qdisc_calculate_pkt_len(skb, stab);
#endif
+}
+
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ qdisc_calculate_pkt_len(skb, sch);
return sch->enqueue(skb, sch);
}
diff --git a/include/net/sock.h b/include/net/sock.h
index d884d26..ba6465b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1189,7 +1189,7 @@
static inline void sk_filter_release(struct sk_filter *fp)
{
if (atomic_dec_and_test(&fp->refcnt))
- call_rcu_bh(&fp->rcu, sk_filter_release_rcu);
+ call_rcu(&fp->rcu, sk_filter_release_rcu);
}
static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 38509f0..9179111 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1404,7 +1404,7 @@
extern void tcp_v4_destroy_sock(struct sock *sk);
extern int tcp_v4_gso_send_check(struct sk_buff *skb);
-extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features);
extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
struct sk_buff *skb);
extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
diff --git a/include/net/udp.h b/include/net/udp.h
index bb967dd..e82f3a8 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -245,5 +245,5 @@
extern void udp_init(void);
extern int udp4_ufo_send_check(struct sk_buff *skb);
-extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features);
+extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features);
#endif /* _UDP_H */
diff --git a/kernel/audit.c b/kernel/audit.c
index e495624..162e88e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -74,6 +74,8 @@
int audit_enabled;
int audit_ever_enabled;
+EXPORT_SYMBOL_GPL(audit_enabled);
+
/* Default state when kernel boots without any parameters. */
static int audit_default;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0caa59f..0587c5c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -134,6 +134,10 @@
irq_set_thread_affinity(desc);
}
#endif
+ if (desc->affinity_notify) {
+ kref_get(&desc->affinity_notify->kref);
+ schedule_work(&desc->affinity_notify->work);
+ }
desc->status |= IRQ_AFFINITY_SET;
raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
@@ -155,6 +159,79 @@
}
EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+static void irq_affinity_notify(struct work_struct *work)
+{
+ struct irq_affinity_notify *notify =
+ container_of(work, struct irq_affinity_notify, work);
+ struct irq_desc *desc = irq_to_desc(notify->irq);
+ cpumask_var_t cpumask;
+ unsigned long flags;
+
+ if (!desc)
+ goto out;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ if (desc->status & IRQ_MOVE_PENDING)
+ cpumask_copy(cpumask, desc->pending_mask);
+ else
+#endif
+ cpumask_copy(cpumask, desc->affinity);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ notify->notify(notify, cpumask);
+
+ free_cpumask_var(cpumask);
+out:
+ kref_put(¬ify->kref, notify->release);
+}
+
+/**
+ * irq_set_affinity_notifier - control notification of IRQ affinity changes
+ * @irq: Interrupt for which to enable/disable notification
+ * @notify: Context for notification, or %NULL to disable
+ * notification. Function pointers must be initialised;
+ * the other fields will be initialised by this function.
+ *
+ * Must be called in process context. Notification may only be enabled
+ * after the IRQ is allocated and must be disabled before the IRQ is
+ * freed using free_irq().
+ */
+int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_affinity_notify *old_notify;
+ unsigned long flags;
+
+ /* The release function is promised process context */
+ might_sleep();
+
+ if (!desc)
+ return -EINVAL;
+
+ /* Complete initialisation of *notify */
+ if (notify) {
+ notify->irq = irq;
+ kref_init(¬ify->kref);
+ INIT_WORK(¬ify->work, irq_affinity_notify);
+ }
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ old_notify = desc->affinity_notify;
+ desc->affinity_notify = notify;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ if (old_notify)
+ kref_put(&old_notify->kref, old_notify->release);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
+
#ifndef CONFIG_AUTO_IRQ_AFFINITY
/*
* Generic version of the affinity autoselector.
@@ -1004,6 +1081,11 @@
if (!desc)
return;
+#ifdef CONFIG_SMP
+ if (WARN_ON(desc->affinity_notify))
+ desc->affinity_notify = NULL;
+#endif
+
chip_bus_lock(desc);
kfree(__free_irq(irq, dev_id));
chip_bus_sync_unlock(desc);
diff --git a/lib/Kconfig b/lib/Kconfig
index 0ee67e0..8334342 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -201,6 +201,10 @@
bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
depends on EXPERIMENTAL && BROKEN
+config CPU_RMAP
+ bool
+ depends on SMP
+
#
# Netlink attribute parsing support is select'ed if needed
#
diff --git a/lib/Makefile b/lib/Makefile
index cbb774f..b73ba01 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -110,6 +110,8 @@
obj-$(CONFIG_AVERAGE) += average.o
+obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
+
hostprogs-y := gen_crc32table
clean-files := crc32table.h
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
new file mode 100644
index 0000000..987acfa
--- /dev/null
+++ b/lib/cpu_rmap.c
@@ -0,0 +1,269 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpu_rmap.h>
+#ifdef CONFIG_GENERIC_HARDIRQS
+#include <linux/interrupt.h>
+#endif
+#include <linux/module.h>
+
+/*
+ * These functions maintain a mapping from CPUs to some ordered set of
+ * objects with CPU affinities. This can be seen as a reverse-map of
+ * CPU affinity. However, we do not assume that the object affinities
+ * cover all CPUs in the system. For those CPUs not directly covered
+ * by object affinities, we attempt to find a nearest object based on
+ * CPU topology.
+ */
+
+/**
+ * alloc_cpu_rmap - allocate CPU affinity reverse-map
+ * @size: Number of objects to be mapped
+ * @flags: Allocation flags e.g. %GFP_KERNEL
+ */
+struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
+{
+ struct cpu_rmap *rmap;
+ unsigned int cpu;
+ size_t obj_offset;
+
+ /* This is a silly number of objects, and we use u16 indices. */
+ if (size > 0xffff)
+ return NULL;
+
+ /* Offset of object pointer array from base structure */
+ obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
+ sizeof(void *));
+
+ rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
+ if (!rmap)
+ return NULL;
+
+ rmap->obj = (void **)((char *)rmap + obj_offset);
+
+ /* Initially assign CPUs to objects on a rota, since we have
+ * no idea where the objects are. Use infinite distance, so
+ * any object with known distance is preferable. Include the
+ * CPUs that are not present/online, since we definitely want
+ * any newly-hotplugged CPUs to have some object assigned.
+ */
+ for_each_possible_cpu(cpu) {
+ rmap->near[cpu].index = cpu % size;
+ rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+ }
+
+ rmap->size = size;
+ return rmap;
+}
+EXPORT_SYMBOL(alloc_cpu_rmap);
+
+/* Reevaluate nearest object for given CPU, comparing with the given
+ * neighbours at the given distance.
+ */
+static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
+ const struct cpumask *mask, u16 dist)
+{
+ int neigh;
+
+ for_each_cpu(neigh, mask) {
+ if (rmap->near[cpu].dist > dist &&
+ rmap->near[neigh].dist <= dist) {
+ rmap->near[cpu].index = rmap->near[neigh].index;
+ rmap->near[cpu].dist = dist;
+ return true;
+ }
+ }
+ return false;
+}
+
+#ifdef DEBUG
+static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+ unsigned index;
+ unsigned int cpu;
+
+ pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
+
+ for_each_possible_cpu(cpu) {
+ index = rmap->near[cpu].index;
+ pr_info("cpu %d -> obj %u (distance %u)\n",
+ cpu, index, rmap->near[cpu].dist);
+ }
+}
+#else
+static inline void
+debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+}
+#endif
+
+/**
+ * cpu_rmap_add - add object to a rmap
+ * @rmap: CPU rmap allocated with alloc_cpu_rmap()
+ * @obj: Object to add to rmap
+ *
+ * Return index of object.
+ */
+int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
+{
+ u16 index;
+
+ BUG_ON(rmap->used >= rmap->size);
+ index = rmap->used++;
+ rmap->obj[index] = obj;
+ return index;
+}
+EXPORT_SYMBOL(cpu_rmap_add);
+
+/**
+ * cpu_rmap_update - update CPU rmap following a change of object affinity
+ * @rmap: CPU rmap to update
+ * @index: Index of object whose affinity changed
+ * @affinity: New CPU affinity of object
+ */
+int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+ const struct cpumask *affinity)
+{
+ cpumask_var_t update_mask;
+ unsigned int cpu;
+
+ if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
+ return -ENOMEM;
+
+ /* Invalidate distance for all CPUs for which this used to be
+ * the nearest object. Mark those CPUs for update.
+ */
+ for_each_online_cpu(cpu) {
+ if (rmap->near[cpu].index == index) {
+ rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+ cpumask_set_cpu(cpu, update_mask);
+ }
+ }
+
+ debug_print_rmap(rmap, "after invalidating old distances");
+
+ /* Set distance to 0 for all CPUs in the new affinity mask.
+ * Mark all CPUs within their NUMA nodes for update.
+ */
+ for_each_cpu(cpu, affinity) {
+ rmap->near[cpu].index = index;
+ rmap->near[cpu].dist = 0;
+ cpumask_or(update_mask, update_mask,
+ cpumask_of_node(cpu_to_node(cpu)));
+ }
+
+ debug_print_rmap(rmap, "after updating neighbours");
+
+ /* Update distances based on topology */
+ for_each_cpu(cpu, update_mask) {
+ if (cpu_rmap_copy_neigh(rmap, cpu,
+ topology_thread_cpumask(cpu), 1))
+ continue;
+ if (cpu_rmap_copy_neigh(rmap, cpu,
+ topology_core_cpumask(cpu), 2))
+ continue;
+ if (cpu_rmap_copy_neigh(rmap, cpu,
+ cpumask_of_node(cpu_to_node(cpu)), 3))
+ continue;
+ /* We could continue into NUMA node distances, but for now
+ * we give up.
+ */
+ }
+
+ debug_print_rmap(rmap, "after copying neighbours");
+
+ free_cpumask_var(update_mask);
+ return 0;
+}
+EXPORT_SYMBOL(cpu_rmap_update);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/* Glue between IRQ affinity notifiers and CPU rmaps */
+
+struct irq_glue {
+ struct irq_affinity_notify notify;
+ struct cpu_rmap *rmap;
+ u16 index;
+};
+
+/**
+ * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
+ * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
+ *
+ * Must be called in process context, before freeing the IRQs, and
+ * without holding any locks required by global workqueue items.
+ */
+void free_irq_cpu_rmap(struct cpu_rmap *rmap)
+{
+ struct irq_glue *glue;
+ u16 index;
+
+ if (!rmap)
+ return;
+
+ for (index = 0; index < rmap->used; index++) {
+ glue = rmap->obj[index];
+ irq_set_affinity_notifier(glue->notify.irq, NULL);
+ }
+ irq_run_affinity_notifiers();
+
+ kfree(rmap);
+}
+EXPORT_SYMBOL(free_irq_cpu_rmap);
+
+static void
+irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
+{
+ struct irq_glue *glue =
+ container_of(notify, struct irq_glue, notify);
+ int rc;
+
+ rc = cpu_rmap_update(glue->rmap, glue->index, mask);
+ if (rc)
+ pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
+}
+
+static void irq_cpu_rmap_release(struct kref *ref)
+{
+ struct irq_glue *glue =
+ container_of(ref, struct irq_glue, notify.kref);
+ kfree(glue);
+}
+
+/**
+ * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
+ * @rmap: The reverse-map
+ * @irq: The IRQ number
+ *
+ * This adds an IRQ affinity notifier that will update the reverse-map
+ * automatically.
+ *
+ * Must be called in process context, after the IRQ is allocated but
+ * before it is bound with request_irq().
+ */
+int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
+{
+ struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
+ int rc;
+
+ if (!glue)
+ return -ENOMEM;
+ glue->notify.notify = irq_cpu_rmap_notify;
+ glue->notify.release = irq_cpu_rmap_release;
+ glue->rmap = rmap;
+ glue->index = cpu_rmap_add(rmap, glue);
+ rc = irq_set_affinity_notifier(irq, &glue->notify);
+ if (rc)
+ kfree(glue);
+ return rc;
+}
+EXPORT_SYMBOL(irq_cpu_rmap_add);
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6e64f7c..7850412 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -327,7 +327,7 @@
static void vlan_transfer_features(struct net_device *dev,
struct net_device *vlandev)
{
- unsigned long old_features = vlandev->features;
+ u32 old_features = vlandev->features;
vlandev->features &= ~dev->vlan_features;
vlandev->features |= dev->features & dev->vlan_features;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 17c5ba7..29a54cc 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
* safely advertise a maxsize
* of 64k */
-#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
/**
* struct p9_trans_rdma - RDMA transport instance
*
diff --git a/net/Kconfig b/net/Kconfig
index 7284062..79cabf1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,6 +221,12 @@
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+config RFS_ACCEL
+ boolean
+ depends on RPS && GENERIC_HARDIRQS
+ select CPU_RMAP
+ default y
+
config XPS
boolean
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9d1e2b..2a6801d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -365,7 +365,7 @@
void br_features_recompute(struct net_bridge *br)
{
struct net_bridge_port *p;
- unsigned long features, mask;
+ u32 features, mask;
features = mask = br->feature_mask;
if (list_empty(&br->port_list))
@@ -379,7 +379,7 @@
}
done:
- br->dev->features = netdev_fix_features(features, NULL);
+ br->dev->features = netdev_fix_features(br->dev, features);
}
/* called with RTNL */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 84aac77..9f22898 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -182,7 +182,7 @@
struct br_cpu_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
- unsigned long feature_mask;
+ u32 feature_mask;
#ifdef CONFIG_BRIDGE_NETFILTER
struct rtable fake_rtable;
bool nf_call_iptables;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46af..2ed0056 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ip6.h>
-struct tcpudphdr {
- __be16 src;
- __be16 dst;
+union pkthdr {
+ struct {
+ __be16 src;
+ __be16 dst;
+ } tcpudphdr;
+ struct {
+ u8 type;
+ u8 code;
+ } icmphdr;
};
static bool
@@ -33,8 +39,8 @@
const struct ebt_ip6_info *info = par->matchinfo;
const struct ipv6hdr *ih6;
struct ipv6hdr _ip6h;
- const struct tcpudphdr *pptr;
- struct tcpudphdr _ports;
+ const union pkthdr *pptr;
+ union pkthdr _pkthdr;
ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
if (ih6 == NULL)
@@ -56,26 +62,34 @@
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
return false;
- if (!(info->bitmask & EBT_IP6_DPORT) &&
- !(info->bitmask & EBT_IP6_SPORT))
+ if (!(info->bitmask & ( EBT_IP6_DPORT |
+ EBT_IP6_SPORT | EBT_IP6_ICMP6)))
return true;
- pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
- &_ports);
+
+ /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+ pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+ &_pkthdr);
if (pptr == NULL)
return false;
if (info->bitmask & EBT_IP6_DPORT) {
- u32 dst = ntohs(pptr->dst);
+ u16 dst = ntohs(pptr->tcpudphdr.dst);
if (FWINV(dst < info->dport[0] ||
dst > info->dport[1], EBT_IP6_DPORT))
return false;
}
if (info->bitmask & EBT_IP6_SPORT) {
- u32 src = ntohs(pptr->src);
+ u16 src = ntohs(pptr->tcpudphdr.src);
if (FWINV(src < info->sport[0] ||
src > info->sport[1], EBT_IP6_SPORT))
return false;
}
- return true;
+ if ((info->bitmask & EBT_IP6_ICMP6) &&
+ FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+ pptr->icmphdr.type > info->icmpv6_type[1] ||
+ pptr->icmphdr.code < info->icmpv6_code[0] ||
+ pptr->icmphdr.code > info->icmpv6_code[1],
+ EBT_IP6_ICMP6))
+ return false;
}
return true;
}
@@ -103,6 +117,14 @@
return -EINVAL;
if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
return -EINVAL;
+ if (info->bitmask & EBT_IP6_ICMP6) {
+ if ((info->invflags & EBT_IP6_PROTO) ||
+ info->protocol != IPPROTO_ICMPV6)
+ return -EINVAL;
+ if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+ info->icmpv6_code[0] > info->icmpv6_code[1])
+ return -EINVAL;
+ }
return 0;
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df053..5f1825d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1764,6 +1764,7 @@
newinfo->entries_size = size;
+ xt_compat_init_offsets(AF_INET, info->nentries);
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c665de7..f1f98d9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -23,10 +23,8 @@
#include <asm/atomic.h>
#define MAX_PHY_LAYERS 7
-#define PHY_NAME_LEN 20
#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-#define RFM_FRAGMENT_SIZE 4030
/* Information about CAIF physical interfaces held by Config Module in order
* to manage physical interfaces
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index d3ed264..27dab26 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -18,7 +18,6 @@
#define DGM_CMD_BIT 0x80
#define DGM_FLOW_OFF 0x81
#define DGM_FLOW_ON 0x80
-#define DGM_CTRL_PKT_SIZE 1
#define DGM_MTU 1500
static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 9297f7d..8303fe3 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@
spinlock_t sync;
bool usestx;
};
-#define STXLEN(layr) (layr->usestx ? 1 : 0)
static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index efad410..315c0d6 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
#define UTIL_REMOTE_SHUTDOWN 0x82
#define UTIL_FLOW_OFF 0x81
#define UTIL_FLOW_ON 0x80
-#define UTIL_CTRL_PKT_SIZE 1
+
static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3b425b1..c3b1dec 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
#define VEI_FLOW_OFF 0x81
#define VEI_FLOW_ON 0x80
#define VEI_SET_PIN 0x82
-#define VEI_CTRL_PKT_SIZE 1
+
#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/core/dev.c b/net/core/dev.c
index 24ea2d7..1b4c07f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
#include "net-sysfs.h"
@@ -1286,7 +1287,7 @@
return __dev_close_many(&single);
}
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
{
struct net_device *dev, *tmp;
LIST_HEAD(tmp_list);
@@ -1594,6 +1595,48 @@
rcu_read_unlock();
}
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+ int i;
+ struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+ /* If TC0 is invalidated disable TC mapping */
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues changed "
+ "invalidating tc mappings. Priority "
+ "traffic classification disabled!\n");
+ dev->num_tc = 0;
+ return;
+ }
+
+ /* Invalidated prio to tc mappings set to TC0 */
+ for (i = 1; i < TC_BITMASK + 1; i++) {
+ int q = netdev_get_prio_tc_map(dev, i);
+
+ tc = &dev->tc_to_txq[q];
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues "
+ "changed. Priority %i to tc "
+ "mapping %i is no longer valid "
+ "setting map to 0\n",
+ i, q);
+ netdev_set_prio_tc_map(dev, i, 0);
+ }
+ }
+}
+
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1613,6 +1656,9 @@
if (rc)
return rc;
+ if (dev->num_tc)
+ netif_setup_tc(dev, txq);
+
if (txq < dev->real_num_tx_queues)
qdisc_reset_all_tx_gt(dev, txq);
}
@@ -1812,7 +1858,7 @@
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
@@ -2000,7 +2046,7 @@
protocol == htons(ETH_P_FCOE)));
}
-static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
{
if (!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
@@ -2012,10 +2058,10 @@
return features;
}
-int netif_skb_features(struct sk_buff *skb)
+u32 netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- int features = skb->dev->features;
+ u32 features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2060,7 +2106,7 @@
int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
- int features;
+ u32 features;
/*
* If device doesnt need skb->dst, release it right now while
@@ -2162,6 +2208,8 @@
unsigned int num_tx_queues)
{
u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
@@ -2170,13 +2218,19 @@
return hash;
}
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol ^ skb->rxhash;
hash = jhash_1word(hash, hashrnd);
- return (u16) (((u64) hash * num_tx_queues) >> 32);
+ return (u16) (((u64) hash * qcount) >> 32) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
@@ -2273,15 +2327,18 @@
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
- bool contended = qdisc_is_running(q);
+ bool contended;
int rc;
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ qdisc_calculate_pkt_len(skb, q);
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
* This permits __QDISC_STATE_RUNNING owner to get the lock more often
* and dequeue packets faster.
*/
+ contended = qdisc_is_running(q);
if (unlikely(contended))
spin_lock(&q->busylock);
@@ -2299,7 +2356,6 @@
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
- qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_bstats_update(q, skb);
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2314,7 +2370,7 @@
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
- rc = qdisc_enqueue_root(skb, q);
+ rc = q->enqueue(skb, q) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -2533,6 +2589,53 @@
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow *rflow, u16 next_cpu)
+{
+ u16 tcpu;
+
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+ struct netdev_rx_queue *rxqueue;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *old_rflow;
+ u32 flow_id;
+ u16 rxq_index;
+ int rc;
+
+ /* Should we steer this flow to a different hardware queue? */
+ if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
+ goto out;
+ rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+ if (rxq_index == skb_get_rx_queue(skb))
+ goto out;
+
+ rxqueue = dev->_rx + rxq_index;
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (!flow_table)
+ goto out;
+ flow_id = skb->rxhash & flow_table->mask;
+ rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+ rxq_index, flow_id);
+ if (rc < 0)
+ goto out;
+ old_rflow = rflow;
+ rflow = &flow_table->flows[flow_id];
+ rflow->cpu = next_cpu;
+ rflow->filter = rc;
+ if (old_rflow->filter == rflow->filter)
+ old_rflow->filter = RPS_NO_FILTER;
+ out:
+#endif
+ rflow->last_qtail =
+ per_cpu(softnet_data, tcpu).input_queue_head;
+ }
+
+ return rflow;
+}
+
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
@@ -2603,12 +2706,9 @@
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0)) {
- tcpu = rflow->cpu = next_cpu;
- if (tcpu != RPS_NO_CPU)
- rflow->last_qtail = per_cpu(softnet_data,
- tcpu).input_queue_head;
- }
+ rflow->last_qtail)) >= 0))
+ rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
@@ -2629,6 +2729,46 @@
return cpu;
}
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+ u32 flow_id, u16 filter_id)
+{
+ struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *rflow;
+ bool expire = true;
+ int cpu;
+
+ rcu_read_lock();
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (flow_table && flow_id <= flow_table->mask) {
+ rflow = &flow_table->flows[flow_id];
+ cpu = ACCESS_ONCE(rflow->cpu);
+ if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+ ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+ rflow->last_qtail) <
+ (int)(10 * flow_table->mask)))
+ expire = false;
+ }
+ rcu_read_unlock();
+ return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
@@ -4573,6 +4713,17 @@
EXPORT_SYMBOL(dev_set_mtu);
/**
+ * dev_set_group - Change group this device belongs to
+ * @dev: device
+ * @new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+ dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
@@ -5062,41 +5213,49 @@
rollback_registered_many(&single);
}
-unsigned long netdev_fix_features(unsigned long features, const char *name)
+u32 netdev_fix_features(struct net_device *dev, u32 features)
{
+ /* Fix illegal checksum combinations */
+ if ((features & NETIF_F_HW_CSUM) &&
+ (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_info(dev, "mixed HW and IP checksum settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+ }
+
+ if ((features & NETIF_F_NO_CSUM) &&
+ (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_info(dev, "mixed no checksumming and other settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+ }
+
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
- "checksum feature.\n", name);
+ netdev_info(dev,
+ "Dropping NETIF_F_SG since no checksum feature.\n");
features &= ~NETIF_F_SG;
}
/* TSO requires that SG is present as well. */
if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
- "SG feature.\n", name);
+ netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
features &= ~NETIF_F_TSO;
}
+ /* UFO needs SG and checksumming */
if (features & NETIF_F_UFO) {
/* maybe split UFO into V4 and V6? */
if (!((features & NETIF_F_GEN_CSUM) ||
(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no checksum offload features.\n",
- name);
+ netdev_info(dev,
+ "Dropping NETIF_F_UFO since no checksum offload features.\n");
features &= ~NETIF_F_UFO;
}
if (!(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no NETIF_F_SG feature.\n", name);
+ netdev_info(dev,
+ "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
features &= ~NETIF_F_UFO;
}
}
@@ -5239,22 +5398,7 @@
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
- /* Fix illegal checksum combinations */
- if ((dev->features & NETIF_F_HW_CSUM) &&
- (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
- }
-
- if ((dev->features & NETIF_F_NO_CSUM) &&
- (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
- }
-
- dev->features = netdev_fix_features(dev->features, dev->name);
+ dev->features = netdev_fix_features(dev, dev->features);
/* Enable software GSO if SG is supported. */
if (dev->features & NETIF_F_SG)
@@ -5679,6 +5823,7 @@
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
+ dev->group = INIT_NETDEV_GROUP;
return dev;
free_pcpu:
@@ -5989,8 +6134,7 @@
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
- unsigned long mask)
+u32 netdev_increment_features(u32 all, u32 one, u32 mask)
{
/* If device needs checksumming, downgrade to it. */
if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ff23029..5984ee0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1458,7 +1458,7 @@
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
- unsigned long old_features;
+ u32 old_features;
if (!dev || !netif_device_present(dev))
return -ENODEV;
diff --git a/net/core/filter.c b/net/core/filter.c
index afc5837..232b187 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@
if (err)
return err;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return err;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a9029..799f06e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@
{
size_t size = entries * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
- struct neighbour **buckets;
+ struct neighbour __rcu **buckets;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
@@ -324,14 +324,14 @@
if (size <= PAGE_SIZE)
buckets = kzalloc(size, GFP_ATOMIC);
else
- buckets = (struct neighbour **)
+ buckets = (struct neighbour __rcu **)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
if (!buckets) {
kfree(ret);
return NULL;
}
- rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_buckets = buckets;
ret->hash_mask = entries - 1;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
@@ -343,7 +343,7 @@
struct neigh_hash_table,
rcu);
size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
- struct neighbour **buckets = nht->hash_buckets;
+ struct neighbour __rcu **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
kfree(buckets);
@@ -1540,7 +1540,7 @@
panic("cannot create neighbour proc dir entry");
#endif
- tbl->nht = neigh_hash_alloc(8);
+ RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@
}
write_unlock(&neigh_tbl_lock);
- call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+ neigh_hash_free_rcu);
tbl->nht = NULL;
kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01b..2e4a393 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -99,7 +99,7 @@
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(features, fmt_hex);
NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
@@ -295,6 +295,20 @@
return ret;
}
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+ dev_set_group(net, (int) new_group);
+ return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, change_group);
+}
+
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +330,7 @@
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
+ __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group),
{}
};
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4..d73b77a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -251,6 +251,7 @@
int max_pkt_size; /* = ETH_ZLEN; */
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
+ struct page *page;
u64 delay; /* nano-seconds */
__u64 count; /* Default No packets to send */
@@ -1134,6 +1135,10 @@
if (node_possible(value)) {
pkt_dev->node = value;
sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ if (pkt_dev->page) {
+ put_page(pkt_dev->page);
+ pkt_dev->page = NULL;
+ }
}
else
sprintf(pg_result, "ERROR: node not possible");
@@ -2605,6 +2610,90 @@
return htons(id | (cfi << 12) | (prio << 13));
}
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+ int datalen)
+{
+ struct timeval timestamp;
+ struct pktgen_hdr *pgh;
+
+ pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+ datalen -= sizeof(*pgh);
+
+ if (pkt_dev->nfrags <= 0) {
+ pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+ memset(pgh + 1, 0, datalen);
+ } else {
+ int frags = pkt_dev->nfrags;
+ int i, len;
+
+
+ if (frags > MAX_SKB_FRAGS)
+ frags = MAX_SKB_FRAGS;
+ len = datalen - frags * PAGE_SIZE;
+ if (len > 0) {
+ memset(skb_put(skb, len), 0, len);
+ datalen = frags * PAGE_SIZE;
+ }
+
+ i = 0;
+ while (datalen > 0) {
+ if (unlikely(!pkt_dev->page)) {
+ int node = numa_node_id();
+
+ if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+ node = pkt_dev->node;
+ pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (!pkt_dev->page)
+ break;
+ }
+ skb_shinfo(skb)->frags[i].page = pkt_dev->page;
+ get_page(pkt_dev->page);
+ skb_shinfo(skb)->frags[i].page_offset = 0;
+ skb_shinfo(skb)->frags[i].size =
+ (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+ datalen -= skb_shinfo(skb)->frags[i].size;
+ skb->len += skb_shinfo(skb)->frags[i].size;
+ skb->data_len += skb_shinfo(skb)->frags[i].size;
+ i++;
+ skb_shinfo(skb)->nr_frags = i;
+ }
+
+ while (i < frags) {
+ int rem;
+
+ if (i == 0)
+ break;
+
+ rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+ if (rem == 0)
+ break;
+
+ skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+ skb_shinfo(skb)->frags[i] =
+ skb_shinfo(skb)->frags[i - 1];
+ get_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page =
+ skb_shinfo(skb)->frags[i - 1].page;
+ skb_shinfo(skb)->frags[i].page_offset +=
+ skb_shinfo(skb)->frags[i - 1].size;
+ skb_shinfo(skb)->frags[i].size = rem;
+ i++;
+ skb_shinfo(skb)->nr_frags = i;
+ }
+ }
+
+ /* Stamp the time, and sequence number,
+ * convert them to network byte order
+ */
+ pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+ pgh->seq_num = htonl(pkt_dev->seq_num);
+
+ do_gettimeofday(×tamp);
+ pgh->tv_sec = htonl(timestamp.tv_sec);
+ pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2613,7 +2702,6 @@
struct udphdr *udph;
int datalen, iplen;
struct iphdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IP);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2817,7 @@
pkt_dev->pkt_overhead);
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
-
- if (pkt_dev->nfrags <= 0) {
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
- } else {
- int frags = pkt_dev->nfrags;
- int i, len;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- len = datalen - frags * PAGE_SIZE;
- memset(skb_put(skb, len), 0, len);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(×tamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
#ifdef CONFIG_XFRM
if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2980,7 +2999,6 @@
struct udphdr *udph;
int datalen;
struct ipv6hdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -3083,75 +3101,7 @@
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- if (pkt_dev->nfrags <= 0)
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- else {
- int frags = pkt_dev->nfrags;
- int i;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- skb_put(skb, datalen - frags * PAGE_SIZE);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- * should we update cloned packets too ?
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(×tamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
- /* pkt_dev->seq_num++; FF: you really mean this? */
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
return skb;
}
@@ -3884,6 +3834,8 @@
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
+ if (pkt_dev->page)
+ put_page(pkt_dev->page);
kfree(pkt_dev);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57..c668f8c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
if (dev->ifindex != dev->iflink)
NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1265,6 +1266,11 @@
modified = 1;
}
+ if (tb[IFLA_GROUP]) {
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ modified = 1;
+ }
+
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
@@ -1542,6 +1548,8 @@
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+ if (tb[IFLA_GROUP])
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
return dev;
@@ -1552,6 +1560,24 @@
}
EXPORT_SYMBOL(rtnl_create_link);
+static int rtnl_group_changelink(struct net *net, int group,
+ struct ifinfomsg *ifm,
+ struct nlattr **tb)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ if (dev->group == group) {
+ err = do_setlink(dev, ifm, tb, NULL, 0);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct net *net = sock_net(skb->sk);
@@ -1579,10 +1605,12 @@
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (ifname[0])
- dev = __dev_get_by_name(net, ifname);
- else
- dev = NULL;
+ else {
+ if (ifname[0])
+ dev = __dev_get_by_name(net, ifname);
+ else
+ dev = NULL;
+ }
err = validate_linkmsg(dev, tb);
if (err < 0)
@@ -1646,8 +1674,13 @@
return do_setlink(dev, ifm, tb, ifname, modified);
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ return rtnl_group_changelink(net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, tb);
return -ENODEV;
+ }
if (ifm->ifi_index)
return -EOPNOTSUPP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7cd1bc8..a8b1e3c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2497,7 +2497,7 @@
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
@@ -2507,7 +2507,7 @@
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
- int sg = features & NETIF_F_SG;
+ int sg = !!(features & NETIF_F_SG);
int nfrags = skb_shinfo(skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f2abd37..b66600b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@
};
#define dz_key_0(key) ((key).datum = 0)
-#define dz_prefix(key,dz) ((key).datum)
#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050..8949a05 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -140,6 +140,9 @@
handled by the klogd daemon which is responsible for kernel messages
("man klogd").
+config IP_ROUTE_CLASSID
+ bool
+
config IP_PNP
bool "IP: kernel level autoconfiguration"
help
@@ -657,4 +660,3 @@
on the Internet.
If unsure, say N.
-
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f2b6110..e5e2d9d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1215,7 +1215,7 @@
return err;
}
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct iphdr *iph;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24..9cefe72 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,12 +41,12 @@
__be32 srcmask;
__be32 dst;
__be32 dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
u32 tclassid;
#endif
};
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
u32 fib_rules_tclass(struct fib_result *res)
{
return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@
if (frh->dst_len)
rule4->dst = nla_get_be32(tb[FRA_DST]);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW])
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
#endif
@@ -195,7 +195,7 @@
if (frh->tos && (rule4->tos != frh->tos))
return 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
#endif
@@ -224,7 +224,7 @@
if (rule4->src_len)
NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (rule4->tclassid)
NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3..9aff11d7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -200,7 +200,7 @@
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight != onh->nh_weight ||
#endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +422,7 @@
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
#endif
@@ -476,7 +476,7 @@
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla && nla_get_be32(nla) != nh->nh_gw)
return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla && nla_get_u32(nla) != nh->nh_tclassid)
return 1;
@@ -779,7 +779,7 @@
goto err_inval;
if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
goto err_inval;
#endif
@@ -792,7 +792,7 @@
nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid = cfg->fc_flow;
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1002,7 @@
if (fi->fib_nh->nh_oif)
NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (fi->fib_nh[0].nh_tclassid)
NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
#endif
@@ -1027,7 +1027,7 @@
if (nh->nh_gw)
NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (nh->nh_tclassid)
NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
#endif
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc..d7b2b09 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@
}
}
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (unlikely(skb_dst(skb)->tclassid)) {
struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2..f926a31 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -206,8 +206,9 @@
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
- depends on NF_NAT
+ depends on NF_CONNTRACK_SNMP && NF_NAT
depends on NETFILTER_ADVANCED
+ default NF_NAT && NF_CONNTRACK_SNMP
---help---
This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fff..e95054c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(NFPROTO_ARP, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1333,6 +1334,7 @@
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
+ xt_compat_init_offsets(NFPROTO_ARP, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea..ef7d7b9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1063,6 +1063,7 @@
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1664,6 +1665,7 @@
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
+ xt_compat_init_offsets(AF_INET, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a48..403ca57 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@
* that the ->target() function isn't called after ->destroy() */
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL) {
- pr_info("no conntrack!\n");
- /* FIXME: need to drop invalid ones, since replies
- * to outgoing connections of other nodes will be
- * marked as INVALID */
+ if (ct == NULL)
return NF_DROP;
- }
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8f..d76d6c9 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@
}
#endif
- /* MAC logging for input path only. */
- if (in && !out)
+ if (in != NULL)
dump_mac_header(m, loginfo, skb);
dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 294a2a3..aef5d1f 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@
ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
dev_net(out)->ipv4.iptable_mangle);
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+ if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc..5585980 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
struct ct_iter_state {
struct seq_net_private p;
@@ -35,7 +36,8 @@
for (st->bucket = 0;
st->bucket < net->ct.htable_size;
st->bucket++) {
- n = rcu_dereference(net->ct.hash[st->bucket].first);
+ n = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -48,13 +50,14 @@
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= net->ct.htable_size)
return NULL;
}
- head = rcu_dereference(net->ct.hash[st->bucket].first);
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
}
return head;
}
@@ -217,7 +220,8 @@
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ n = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
if (n)
return n;
}
@@ -230,11 +234,12 @@
struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ head = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
}
return head;
}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f..703f366f 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int ret;
+ int res;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
- if (ret == 0)
+ res = nf_ct_expect_related(exp);
+ if (res == 0)
break;
- else if (ret != -EBUSY) {
+ else if (res != -EBUSY) {
port = 0;
break;
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787c..21bcf47 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@
manips not an issue. */
if (maniptype == IP_NAT_MANIP_SRC &&
!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
- if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
+ /* try the original tuple first */
+ if (in_range(orig_tuple, range)) {
+ if (!nf_nat_used_tuple(orig_tuple, ct)) {
+ *tuple = *orig_tuple;
+ return;
+ }
+ } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+ range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
return;
@@ -266,7 +273,6 @@
struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
- int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
/* nat helper or nfctnetlink also setup binding */
nat = nfct_nat(ct);
@@ -306,8 +312,7 @@
ct->status |= IPS_DST_NAT;
}
- /* Place in source hash if this is the first time. */
- if (have_to_hash) {
+ if (maniptype == IP_NAT_MANIP_SRC) {
unsigned int srchash;
srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@
/* It's done. */
if (maniptype == IP_NAT_MANIP_DST)
- set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+ ct->status |= IPS_DST_NAT_DONE;
else
- set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+ ct->status |= IPS_SRC_NAT_DONE;
return NF_ACCEPT;
}
@@ -502,7 +507,10 @@
int ret = 0;
spin_lock_bh(&nf_nat_lock);
- if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+ if (rcu_dereference_protected(
+ nf_nat_protos[proto->protonum],
+ lockdep_is_held(&nf_nat_lock)
+ ) != &nf_nat_unknown_protocol) {
ret = -EBUSY;
goto out;
}
@@ -532,7 +540,7 @@
if (nat == NULL || nat->ct == NULL)
return;
- NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+ NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
spin_lock_bh(&nf_nat_lock);
hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@
struct nf_conn_nat *old_nat = old;
struct nf_conn *ct = old_nat->ct;
- if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+ if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
return;
spin_lock_bh(&nf_nat_lock);
- new_nat->ct = ct;
hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
spin_unlock_bh(&nf_nat_lock);
}
@@ -679,8 +686,7 @@
{
/* Leave them the same for the moment. */
net->ipv4.nat_htable_size = net->ct.htable_size;
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
- &net->ipv4.nat_vmalloced, 0);
+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
if (!net->ipv4.nat_bysource)
return -ENOMEM;
return 0;
@@ -702,8 +708,7 @@
{
nf_ct_iterate_cleanup(net, &clean_nat, NULL);
synchronize_rcu();
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
- net->ipv4.nat_htable_size);
+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419..8812a02 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@
{
int ret = 0;
- ret = nf_conntrack_helper_register(&snmp_helper);
- if (ret < 0)
- return ret;
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ rcu_assign_pointer(nf_nat_snmp_hook, help);
+
ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@
static void __exit nf_nat_snmp_basic_fini(void)
{
- nf_conntrack_helper_unregister(&snmp_helper);
+ rcu_assign_pointer(nf_nat_snmp_hook, NULL);
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 351dc4e..3e5b7cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -514,7 +514,7 @@
.release = seq_release,
};
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
static int rt_acct_proc_show(struct seq_file *m, void *v)
{
struct ip_rt_acct *dst, *src;
@@ -567,14 +567,14 @@
if (!pde)
goto err2;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
if (!pde)
goto err3;
#endif
return 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
err3:
remove_proc_entry("rt_cache", net->proc_net_stat);
#endif
@@ -588,7 +588,7 @@
{
remove_proc_entry("rt_cache", net->proc_net_stat);
remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
remove_proc_entry("rt_acct", net->proc_net);
#endif
}
@@ -1775,7 +1775,7 @@
memcpy(addr, &src, 4);
}
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
static void set_class_tag(struct rtable *rt, u32 tag)
{
if (!(rt->dst.tclassid & 0xFFFF))
@@ -1825,7 +1825,7 @@
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
}
@@ -1835,7 +1835,7 @@
if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
set_class_tag(rt, fib_rules_tclass(res));
#endif
@@ -1891,7 +1891,7 @@
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
rth->rt_iif =
@@ -2208,7 +2208,7 @@
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
rth->rt_iif =
@@ -2828,7 +2828,7 @@
}
if (rt->dst.dev)
NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (rt->dst.tclassid)
NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
#endif
@@ -3249,9 +3249,9 @@
};
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
static __initdata unsigned long rhash_entries;
static int __init set_rhash_entries(char *str)
@@ -3267,7 +3267,7 @@
{
int rc = 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
panic("IP: failed to allocate ip_rt_acct\n");
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6c11eec..f9867d2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,7 +2653,7 @@
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17..d37baaa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2199,7 +2199,7 @@
return 0;
}
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 978e80e..3194aa9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -772,7 +772,7 @@
return err;
}
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c6..47b7b8d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1076,6 +1076,7 @@
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1679,6 +1680,7 @@
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
+ xt_compat_init_offsets(AF_INET6, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c8889..05027b7 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@
in ? in->name : "",
out ? out->name : "");
- /* MAC logging for input path only. */
- if (in && !out)
+ if (in != NULL)
dump_mac_header(m, loginfo, skb);
dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa..0857272 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@
static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
.data = &nf_init_frags.timeout,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 86c3952..2bc6cd7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -123,18 +123,18 @@
}
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
- struct sk_buff *skb))
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
{
rcu_assign_pointer(mh_filter, filter);
return 0;
}
EXPORT_SYMBOL(rawv6_mh_filter_register);
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
- struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
{
rcu_assign_pointer(mh_filter, NULL);
synchronize_rcu();
@@ -192,10 +192,10 @@
* policy is placed in rawv6_rcv() because it is
* required for each socket.
*/
- int (*filter)(struct sock *sock, struct sk_buff *skb);
+ mh_filter_t *filter;
filter = rcu_dereference(mh_filter);
- filtered = filter ? filter(sk, skb) : 0;
+ filtered = filter ? (*filter)(sk, skb) : 0;
break;
}
#endif
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f1..b1599a3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -412,7 +412,7 @@
p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
do {
- n = p->next;
+ n = rcu_dereference_protected(p->next, 1);
kfree(p);
p = n;
} while (p);
@@ -421,15 +421,17 @@
static int
ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
{
- struct ip_tunnel_prl_entry *x, **p;
+ struct ip_tunnel_prl_entry *x;
+ struct ip_tunnel_prl_entry __rcu **p;
int err = 0;
ASSERT_RTNL();
if (a && a->addr != htonl(INADDR_ANY)) {
- for (p = &t->prl; *p; p = &(*p)->next) {
- if ((*p)->addr == a->addr) {
- x = *p;
+ for (p = &t->prl;
+ (x = rtnl_dereference(*p)) != NULL;
+ p = &x->next) {
+ if (x->addr == a->addr) {
*p = x->next;
call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
t->prl_count--;
@@ -438,9 +440,9 @@
}
err = -ENXIO;
} else {
- if (t->prl) {
+ x = rtnl_dereference(t->prl);
+ if (x) {
t->prl_count = 0;
- x = t->prl;
call_rcu(&x->rcu_head, prl_list_destroy_rcu);
t->prl = NULL;
}
@@ -1179,7 +1181,7 @@
if (!dev->tstats)
return -ENOMEM;
dev_hold(dev);
- sitn->tunnels_wc[0] = tunnel;
+ rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;
}
@@ -1196,11 +1198,12 @@
for (prio = 1; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t = sitn->tunnels[prio][h];
+ struct ip_tunnel *t;
+ t = rtnl_dereference(sitn->tunnels[prio][h]);
while (t != NULL) {
unregister_netdevice_queue(t->dev, head);
- t = t->next;
+ t = rtnl_dereference(t->next);
}
}
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9a009c6..a419a787 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1299,7 +1299,7 @@
return 0;
}
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b..faf7412 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@
If unsure, say `N'.
+config NF_CONNTRACK_TIMESTAMP
+ bool 'Connection tracking timestamping'
+ depends on NETFILTER_ADVANCED
+ help
+ This option enables support for connection tracking timestamping.
+ This allows you to store the flow start-time and to obtain
+ the flow-stop time (once it has been destroyed) via Connection
+ tracking events.
+
+ If unsure, say `N'.
+
config NF_CT_PROTO_DCCP
tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL
@@ -185,9 +196,13 @@
To compile it as a module, choose M here. If unsure, say N.
+config NF_CONNTRACK_BROADCAST
+ tristate
+
config NF_CONNTRACK_NETBIOS_NS
tristate "NetBIOS name service protocol support"
depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_BROADCAST
help
NetBIOS name service requests are sent as broadcast messages from an
unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@
To compile it as a module, choose M here. If unsure, say N.
+config NF_CONNTRACK_SNMP
+ tristate "SNMP service protocol support"
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_BROADCAST
+ help
+ SNMP service requests are sent as broadcast messages from an
+ unprivileged port and responded to with unicast messages to the
+ same port. This make them hard to firewall properly because connection
+ tracking doesn't deal with broadcasts. This helper tracks locally
+ originating SNMP service requests and the corresponding
+ responses. It relies on correct IP address configuration, specifically
+ netmask and broadcast address.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NF_CONNTRACK_PPTP
tristate "PPtP protocol support"
depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@
comment "Xtables targets"
+config NETFILTER_XT_TARGET_AUDIT
+ tristate "AUDIT target support"
+ depends on AUDIT
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds a 'AUDIT' target, which can be used to create
+ audit records for packets dropped/accepted.
+
+ To compileit as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@
config NETFILTER_XT_TARGET_NFQUEUE
tristate '"NFQUEUE" target Support'
depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK_QUEUE
help
This target replaced the old obsolete QUEUE target.
@@ -886,7 +927,7 @@
config NETFILTER_XT_MATCH_REALM
tristate '"realm" match support'
depends on NETFILTER_ADVANCED
- select NET_CLS_ROUTE
+ select IP_ROUTE_CLASSID
help
This option adds a `realm' match, which allows you to use the realm
key from the routing subsystem inside iptables.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f..9ae6878 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@
obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
# targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe2..1e00bf7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,13 +175,21 @@
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
- ret = -(verdict >> NF_VERDICT_BITS);
+ ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- verdict >> NF_VERDICT_BITS))
- goto next_hook;
+ ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+ verdict >> NF_VERDICT_QBITS);
+ if (ret < 0) {
+ if (ret == -ECANCELED)
+ goto next_hook;
+ if (ret == -ESRCH &&
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+ goto next_hook;
+ kfree_skb(skb);
+ }
+ ret = 0;
}
rcu_read_unlock();
return ret;
@@ -214,7 +222,7 @@
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@
}
EXPORT_SYMBOL(nf_ct_attach);
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
EXPORT_SYMBOL(nf_ct_destroy);
void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475ede..5c48ffb 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
/*
* Get an ip_vs_app object
*/
@@ -67,7 +62,8 @@
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
@@ -98,7 +94,7 @@
}
}
- ret = pp->register_app(inc);
+ ret = pp->register_app(net, inc);
if (ret)
goto out;
@@ -119,7 +115,7 @@
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -127,7 +123,7 @@
return;
if (pp->unregister_app)
- pp->unregister_app(inc);
+ pp->unregister_app(net, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
int result;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- result = ip_vs_app_inc_new(app, proto, port);
+ result = ip_vs_app_inc_new(net, app, proto, port);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return result;
}
@@ -185,16 +183,17 @@
/*
* ip_vs_app registration routine
*/
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- list_add(&app->a_list, &ip_vs_app_list);
+ list_add(&app->a_list, &ipvs->app_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return 0;
}
@@ -204,19 +203,20 @@
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *inc, *nxt;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
- ip_vs_app_inc_release(inc);
+ ip_vs_app_inc_release(net, inc);
}
list_del(&app->a_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -226,7 +226,8 @@
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
{
return pp->app_conn_bind(cp);
}
@@ -481,11 +482,11 @@
* /proc/net/ip_vs_app entry function
*/
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
{
struct ip_vs_app *app, *inc;
- list_for_each_entry(app, &ip_vs_app_list, a_list) {
+ list_for_each_entry(app, &ipvs->app_list, a_list) {
list_for_each_entry(inc, &app->incs_list, a_list) {
if (pos-- == 0)
return inc;
@@ -497,19 +498,24 @@
static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
{
- mutex_lock(&__ip_vs_app_mutex);
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+ mutex_lock(&ipvs->app_mutex);
+
+ return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_app *inc, *app;
struct list_head *e;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
++*pos;
if (v == SEQ_START_TOKEN)
- return ip_vs_app_idx(0);
+ return ip_vs_app_idx(ipvs, 0);
inc = v;
app = inc->app;
@@ -518,7 +524,7 @@
return list_entry(e, struct ip_vs_app, a_list);
/* go on to next application */
- for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
app = list_entry(e, struct ip_vs_app, a_list);
list_for_each_entry(inc, &app->incs_list, a_list) {
return inc;
@@ -529,7 +535,9 @@
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
- mutex_unlock(&__ip_vs_app_mutex);
+ struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+ mutex_unlock(&ipvs->app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@
static int ip_vs_app_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_app_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@
};
#endif
+static int __net_init __ip_vs_app_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ INIT_LIST_HEAD(&ipvs->app_list);
+ __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+ proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
+ return 0;
+}
+
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+ proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+ .init = __ip_vs_app_init,
+ .exit = __ip_vs_app_cleanup,
+};
+
int __init ip_vs_app_init(void)
{
- /* we will replace it with proc_net_ipvs_create() soon */
- proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
- return 0;
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_app_ops);
+ return rv;
}
void ip_vs_app_cleanup(void)
{
- proc_net_remove(&init_net, "ip_vs_app");
+ unregister_pernet_subsys(&ip_vs_app_ops);
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecd..83233fe 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
/*
* Connection hash size. Default is what was selected at compile time.
*/
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
/* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
/*
* Connection hash table: for input and output packets lookups of IPVS
*/
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-/* counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
/* counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
/* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
/*
* Fine locking granularity for big connection hash table
*/
-#define CT_LOCKARRAY_BITS 4
+#define CT_LOCKARRAY_BITS 5
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
@@ -133,19 +130,19 @@
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
const union nf_inet_addr *addr,
__be16 port)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
- (__force u32)port, proto, ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
#endif
- return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
- ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@
port = p->vport;
}
- return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
- NULL, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ &cp->caddr, cp->cport, NULL, 0, &p);
- if (cp->dest && cp->dest->svc->pe) {
- p.pe = cp->dest->svc->pe;
+ if (cp->pe) {
+ p.pe = cp->pe;
p.pe_data = cp->pe_data;
p.pe_data_len = cp->pe_data_len;
}
@@ -186,7 +183,7 @@
}
/*
- * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
* returns bool success.
*/
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->cport == cp->cport && p->vport == cp->vport &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
- p->cport == cp->cport && p->vport == cp->vport &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@@ -313,23 +311,23 @@
struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
+ struct net *net = skb_net(skb);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
if (likely(!inverse))
- ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
- &iph->daddr, pptr[1], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
- &iph->saddr, pptr[0], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@@ -353,8 +351,10 @@
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (!ip_vs_conn_net_eq(cp, p->net))
+ continue;
if (p->pe_data && p->pe->ct_match) {
- if (p->pe->ct_match(p, cp))
+ if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out;
continue;
}
@@ -404,10 +404,11 @@
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->vport == cp->cport && p->cport == cp->dport &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
- p->vport == cp->cport && p->cport == cp->dport &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ret = cp;
@@ -428,7 +429,6 @@
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@@ -611,9 +611,9 @@
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
- &cp->vaddr, cp->vport,
- cp->protocol);
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+ cp->dport, &cp->vaddr, cp->vport,
+ cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@@ -686,13 +686,14 @@
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
/*
* Checking the dest server status.
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (sysctl_ip_vs_expire_quiescent_template &&
+ (ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
cp->timeout = 60*HZ;
@@ -765,13 +767,14 @@
if (cp->flags & IP_VS_CONN_F_NFCT)
ip_vs_conn_drop_conntrack(cp);
+ ip_vs_pe_put(cp->pe);
kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
- atomic_dec(&ip_vs_conn_count);
+ atomic_dec(&ipvs->conn_count);
kmem_cache_free(ip_vs_conn_cachep, cp);
return;
@@ -802,10 +805,12 @@
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
- struct ip_vs_dest *dest)
+ struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+ struct netns_ipvs *ipvs = net_ipvs(p->net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@@ -815,6 +820,7 @@
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@
&cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
- if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+ cp->fwmark = fwmark;
+ if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+ ip_vs_pe_get(p->pe);
+ cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
}
@@ -842,7 +851,7 @@
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
- atomic_inc(&ip_vs_conn_count);
+ atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
@@ -861,8 +870,8 @@
#endif
ip_vs_bind_xmit(cp);
- if (unlikely(pp && atomic_read(&pp->appcnt)))
- ip_vs_bind_app(cp, pp);
+ if (unlikely(pd && atomic_read(&pd->appcnt)))
+ ip_vs_bind_app(cp, pd->pp);
/*
* Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@
* IP_VS_CONN_F_ONE_PACKET too.
*/
- if (ip_vs_conntrack_enabled())
+ if (ip_vs_conntrack_enabled(ipvs))
cp->flags |= IP_VS_CONN_F_NFCT;
/* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@
* /proc/net/ip_vs_conn entries
*/
#ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+ struct seq_net_private p;
+ struct list_head *l;
+};
static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
{
int idx;
struct ip_vs_conn *cp;
+ struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
@@ -906,14 +920,17 @@
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
- seq->private = NULL;
+ struct ip_vs_iter_state *iter = seq->private;
+
+ iter->l = NULL;
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
- struct list_head *e, *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *e, *l = iter->l;
int idx;
++*pos;
@@ -930,18 +947,19 @@
while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
ct_read_unlock_bh(idx);
}
- seq->private = NULL;
+ iter->l = NULL;
return NULL;
}
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{
- struct list_head *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *l = iter->l;
if (l)
ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
- if (cp->dest && cp->pe_data &&
- cp->dest->svc->pe->show_pe_data) {
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
+ if (cp->pe_data) {
pe_data[0] = ' ';
- len = strlen(cp->dest->svc->pe->name);
- memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+ len = strlen(cp->pe->name);
+ memcpy(pe_data + 1, cp->pe->name, len);
pe_data[len + 1] = ' ';
len += 2;
- len += cp->dest->svc->pe->show_pe_data(cp,
- pe_data + len);
+ len += cp->pe->show_pe_data(cp, pe_data + len);
}
pe_data[len] = '\0';
@@ -1004,7 +1023,8 @@
static int ip_vs_conn_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
+
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_sync_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
- flush_again:
+flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
/*
* Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@
ct_write_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
if (cp->control) {
@@ -1194,16 +1222,41 @@
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
- if (atomic_read(&ip_vs_conn_count) != 0) {
+ if (atomic_read(&ipvs->conn_count) != 0) {
schedule();
goto flush_again;
}
}
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ atomic_set(&ipvs->conn_count, 0);
+
+ proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+ proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+ return 0;
+}
+
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+ /* flush all the connection entries first */
+ ip_vs_conn_flush(net);
+ proc_net_remove(net, "ip_vs_conn");
+ proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+ .init = __ip_vs_conn_init,
+ .exit = __ip_vs_conn_cleanup,
+};
int __init ip_vs_conn_init(void)
{
int idx;
+ int retc;
/* Compute size and mask */
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
- proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
- proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+ retc = register_pernet_subsys(&ipvs_conn_ops);
/* calculate the random value for connection hash */
get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
- return 0;
+ return retc;
}
-
void ip_vs_conn_cleanup(void)
{
- /* flush all the connection entries first */
- ip_vs_conn_flush();
-
+ unregister_pernet_subsys(&ipvs_conn_ops);
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
- proc_net_remove(&init_net, "ip_vs_conn");
- proc_net_remove(&init_net, "ip_vs_conn_sync");
vfree(ip_vs_conn_tab);
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9..f36a84f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h>
#include <net/ip6_checksum.h>
+#include <net/netns/generic.h> /* net_generic() */
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
/* ID used in ICMP lookups */
#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
- spin_lock(&dest->stats.lock);
- dest->stats.ustats.inpkts++;
- dest->stats.ustats.inbytes += skb->len;
- spin_unlock(&dest->stats.lock);
+ struct ip_vs_cpu_stats *s;
- spin_lock(&dest->svc->stats.lock);
- dest->svc->stats.ustats.inpkts++;
- dest->svc->stats.ustats.inbytes += skb->len;
- spin_unlock(&dest->svc->stats.lock);
+ s = this_cpu_ptr(dest->stats.cpustats);
+ s->ustats.inpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.inbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.inpkts++;
- ip_vs_stats.ustats.inbytes += skb->len;
- spin_unlock(&ip_vs_stats.lock);
+ s = this_cpu_ptr(dest->svc->stats.cpustats);
+ s->ustats.inpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.inbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
+
+ s = this_cpu_ptr(ipvs->cpustats);
+ s->ustats.inpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.inbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
}
}
@@ -131,21 +145,28 @@
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
- spin_lock(&dest->stats.lock);
- dest->stats.ustats.outpkts++;
- dest->stats.ustats.outbytes += skb->len;
- spin_unlock(&dest->stats.lock);
+ struct ip_vs_cpu_stats *s;
- spin_lock(&dest->svc->stats.lock);
- dest->svc->stats.ustats.outpkts++;
- dest->svc->stats.ustats.outbytes += skb->len;
- spin_unlock(&dest->svc->stats.lock);
+ s = this_cpu_ptr(dest->stats.cpustats);
+ s->ustats.outpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.outbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.outpkts++;
- ip_vs_stats.ustats.outbytes += skb->len;
- spin_unlock(&ip_vs_stats.lock);
+ s = this_cpu_ptr(dest->svc->stats.cpustats);
+ s->ustats.outpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.outbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
+
+ s = this_cpu_ptr(ipvs->cpustats);
+ s->ustats.outpkts++;
+ u64_stats_update_begin(&s->syncp);
+ s->ustats.outbytes += skb->len;
+ u64_stats_update_end(&s->syncp);
}
}
@@ -153,41 +174,44 @@
static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
- spin_lock(&cp->dest->stats.lock);
- cp->dest->stats.ustats.conns++;
- spin_unlock(&cp->dest->stats.lock);
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_cpu_stats *s;
- spin_lock(&svc->stats.lock);
- svc->stats.ustats.conns++;
- spin_unlock(&svc->stats.lock);
+ s = this_cpu_ptr(cp->dest->stats.cpustats);
+ s->ustats.conns++;
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.conns++;
- spin_unlock(&ip_vs_stats.lock);
+ s = this_cpu_ptr(svc->stats.cpustats);
+ s->ustats.conns++;
+
+ s = this_cpu_ptr(ipvs->cpustats);
+ s->ustats.conns++;
}
static inline int
ip_vs_set_state(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- if (unlikely(!pp->state_transition))
+ if (unlikely(!pd->pp->state_transition))
return 0;
- return pp->state_transition(cp, direction, skb, pp);
+ return pd->pp->state_transition(cp, direction, skb, pd);
}
-static inline void
+static inline int
ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
struct sk_buff *skb, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+ ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
- p->pe->fill_param(p, skb);
+ return p->pe->fill_param(p, skb);
+
+ return 0;
}
/*
@@ -200,7 +224,7 @@
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
- __be16 ports[2])
+ __be16 src_port, __be16 dst_port, int *ignored)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -247,14 +271,14 @@
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
__be16 vport = 0;
- if (ports[1] == svc->port) {
+ if (dst_port == svc->port) {
/* non-FTP template:
* <protocol, caddr, 0, vaddr, vport, daddr, dport>
* FTP template:
* <protocol, caddr, 0, vaddr, 0, daddr, 0>
*/
if (svc->port != FTPPORT)
- vport = ports[1];
+ vport = dst_port;
} else {
/* Note: persistent fwmark-based services and
* persistent port zero service are handled here.
@@ -268,24 +292,31 @@
vaddr = &fwmark;
}
}
- ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
- vaddr, vport, ¶m);
+ /* return *ignored = -1 so NF_DROP can be used */
+ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+ vaddr, vport, ¶m) < 0) {
+ *ignored = -1;
+ return NULL;
+ }
}
/* Check if a template already exists */
ct = ip_vs_ct_in_get(¶m);
if (!ct || !ip_vs_check_template(ct)) {
- /* No template found or the dest of the connection
+ /*
+ * No template found or the dest of the connection
* template is not available.
+ * return *ignored=0 i.e. ICMP and NF_DROP
*/
dest = svc->scheduler->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
+ *ignored = 0;
return NULL;
}
- if (ports[1] == svc->port && svc->port != FTPPORT)
+ if (dst_port == svc->port && svc->port != FTPPORT)
dport = dest->port;
/* Create a template
@@ -293,9 +324,10 @@
* and thus param.pe_data will be destroyed
* when the template expires */
ct = ip_vs_conn_new(¶m, &dest->addr, dport,
- IP_VS_CONN_F_TEMPLATE, dest);
+ IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
+ *ignored = -1;
return NULL;
}
@@ -306,7 +338,7 @@
kfree(param.pe_data);
}
- dport = ports[1];
+ dport = dst_port;
if (dport == svc->port && dest->port)
dport = dest->port;
@@ -317,11 +349,13 @@
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
- &iph.daddr, ports[1], ¶m);
- cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+ src_port, &iph.daddr, dst_port, ¶m);
+
+ cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
+ *ignored = -1;
return NULL;
}
@@ -341,11 +375,27 @@
* It selects a server according to the virtual service, and
* creates a connection entry.
* Protocols supported: TCP, UDP
+ *
+ * Usage of *ignored
+ *
+ * 1 : protocol tried to schedule (eg. on SYN), found svc but the
+ * svc/scheduler decides that this packet should be accepted with
+ * NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 : scheduler can not find destination, so try bypass or
+ * return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 : scheduler tried to schedule but fatal error occurred, eg.
+ * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ * failure such as missing Call-ID, ENOMEM on skb_linearize
+ * or pe_data. In this case we should return NF_DROP without
+ * any attempts to send ICMP with ip_vs_leave.
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored)
+ struct ip_vs_proto_data *pd, int *ignored)
{
+ struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@
}
/*
- * Do not schedule replies from local real server. It is risky
- * for fwmark services but mostly for persistent services.
+ * Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
- (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+ (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@
/*
* Persistent service
*/
- if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
- *ignored = 0;
- return ip_vs_sched_persist(svc, skb, pptr);
- }
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+ *ignored = 0;
/*
* Non-persistent service
@@ -402,8 +450,6 @@
return NULL;
}
- *ignored = 0;
-
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
- pptr[0], &iph.daddr, pptr[1], &p);
+
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+ &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+ &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
- flags, dest);
- if (!cp)
+ flags, dest, skb->mark);
+ if (!cp) {
+ *ignored = -1;
return NULL;
+ }
}
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@
* no destination is available for a new connection.
*/
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
+ struct net *net;
+ struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
+
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@
ip_vs_service_put(svc);
return NF_DROP;
}
+ net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
- unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+ unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+ ipvs = net_ipvs(net);
+ if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol,
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
- NULL);
+ NULL, skb->mark);
if (!cp)
return NF_DROP;
}
@@ -498,10 +553,10 @@
ip_vs_in_stats(cp, skb);
/* set state */
- cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
/* transmit the first SYN packet */
- ret = cp->packet_xmit(skb, cp, pp);
+ ret = cp->packet_xmit(skb, cp, pd->pp);
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
+ struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@
if (!skb_make_writable(skb, offset))
goto out;
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
@@ -808,7 +866,7 @@
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -885,7 +943,7 @@
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -924,9 +982,12 @@
* Used for NAT and local client.
*/
static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
+ struct ip_vs_protocol *pp = pd->pp;
+ struct netns_ipvs *ipvs;
+
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@
* if it came from this machine itself. So re-compute
* the routing information.
*/
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
@@ -975,7 +1038,7 @@
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
+ struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs;
EnterFunction(11);
@@ -1022,6 +1088,7 @@
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
+ net = skb_net(skb);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
+ pp = pd->pp;
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+ ipvs = net_ipvs(net);
if (likely(cp))
- return handle_response(af, skb, pp, cp, iph.len);
- if (sysctl_ip_vs_nat_icmp_send &&
+ return handle_response(af, skb, pd, cp, iph.len);
+ if (ipvs->sysctl_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(af, iph.protocol,
+ if (ip_vs_lookup_real_service(net, af, iph.protocol,
&iph.saddr,
pptr[0])) {
/*
@@ -1202,12 +1271,14 @@
static int
ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict;
union nf_inet_addr snet;
@@ -1249,9 +1320,11 @@
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->protocol);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->protocol);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (cp) {
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@
static int
ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1319,6 +1393,7 @@
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, verdict;
union nf_inet_addr snet;
struct rt6_info *rt;
@@ -1361,9 +1436,11 @@
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->nexthdr);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->nexthdr);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (cp) {
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
+ struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
+ struct netns_ipvs *ipvs;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
+ net = skb_net(skb);
/* Protocol supported? */
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
-
+ pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
- if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+ if (!pp->conn_schedule(af, skb, pd, &v, &cp))
return v;
}
@@ -1505,12 +1586,13 @@
}
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+ net = skb_net(skb);
+ ipvs = net_ipvs(net);
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_ip_vs_expire_nodest_conn) {
+ if (ipvs->sysctl_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
@@ -1521,7 +1603,7 @@
}
ip_vs_in_stats(cp, skb);
- restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp);
/* do not touch skb anymore */
@@ -1535,35 +1617,41 @@
*
* Sync connection if it is about to close to
* encorage the standby servers to update the connections timeout
+ *
+ * For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
- pkts = atomic_add_return(1, &cp->in_pkts);
- if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ pkts = ipvs->sysctl_sync_threshold[0];
+ else
+ pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
goto out;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if (af == AF_INET &&
- (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
out:
cp->old_state = cp->state;
@@ -1782,7 +1870,41 @@
},
#endif
};
+/*
+ * Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+ struct netns_ipvs *ipvs;
+ ipvs = net_generic(net, ip_vs_net_id);
+ if (ipvs == NULL) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ ipvs->net = net;
+ /* Counters used for creating unique names */
+ ipvs->gen = atomic_read(&ipvs_netns_cnt);
+ atomic_inc(&ipvs_netns_cnt);
+ net->ipvs = ipvs;
+ printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+ sizeof(struct netns_ipvs), ipvs->gen);
+ return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+ .init = __ip_vs_init,
+ .exit = __ip_vs_cleanup,
+ .id = &ip_vs_net_id,
+ .size = sizeof(struct netns_ipvs),
+};
/*
* Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@
{
int ret;
- ip_vs_estimator_init();
+ ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
+ if (ret < 0)
+ return ret;
+ ip_vs_estimator_init();
ret = ip_vs_control_init();
if (ret < 0) {
pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@
goto cleanup_app;
}
+ ret = ip_vs_sync_init();
+ if (ret < 0) {
+ pr_err("can't setup sync data.\n");
+ goto cleanup_conn;
+ }
+
ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
if (ret < 0) {
pr_err("can't register hooks.\n");
- goto cleanup_conn;
+ goto cleanup_sync;
}
pr_info("ipvs loaded.\n");
return ret;
+cleanup_sync:
+ ip_vs_sync_cleanup();
cleanup_conn:
ip_vs_conn_cleanup();
cleanup_app:
@@ -1831,17 +1964,20 @@
ip_vs_control_cleanup();
cleanup_estimator:
ip_vs_estimator_cleanup();
+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
return ret;
}
static void __exit ip_vs_cleanup(void)
{
nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ip_vs_sync_cleanup();
ip_vs_conn_cleanup();
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
ip_vs_estimator_cleanup();
+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
pr_info("ipvs unloaded.\n");
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5..09ca2ce 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
#include <net/ip.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
@@ -57,42 +58,7 @@
/* lock for service table */
static DEFINE_RWLOCK(__ip_vs_svc_lock);
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+ const struct in6_addr *addr)
{
struct rt6_info *rt;
struct flowi fl = {
@@ -114,7 +81,7 @@
.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
@@ -125,7 +92,7 @@
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
*/
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < sysctl_ip_vs_amemthresh);
+ nomem = (availmem < ipvs->sysctl_amemthresh);
local_bh_disable();
/* drop_entry */
- spin_lock(&__ip_vs_dropentry_lock);
- switch (sysctl_ip_vs_drop_entry) {
+ spin_lock(&ipvs->dropentry_lock);
+ switch (ipvs->sysctl_drop_entry) {
case 0:
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
break;
case 1:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
- sysctl_ip_vs_drop_entry = 2;
+ atomic_set(&ipvs->dropentry, 1);
+ ipvs->sysctl_drop_entry = 2;
} else {
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
}
break;
case 2:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
} else {
- atomic_set(&ip_vs_dropentry, 0);
- sysctl_ip_vs_drop_entry = 1;
+ atomic_set(&ipvs->dropentry, 0);
+ ipvs->sysctl_drop_entry = 1;
};
break;
case 3:
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
break;
}
- spin_unlock(&__ip_vs_dropentry_lock);
+ spin_unlock(&ipvs->dropentry_lock);
/* drop_packet */
- spin_lock(&__ip_vs_droppacket_lock);
- switch (sysctl_ip_vs_drop_packet) {
+ spin_lock(&ipvs->droppacket_lock);
+ switch (ipvs->sysctl_drop_packet) {
case 0:
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
break;
case 1:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
- sysctl_ip_vs_drop_packet = 2;
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
+ ipvs->sysctl_drop_packet = 2;
} else {
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
}
break;
case 2:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
} else {
- ip_vs_drop_rate = 0;
- sysctl_ip_vs_drop_packet = 1;
+ ipvs->drop_rate = 0;
+ ipvs->sysctl_drop_packet = 1;
}
break;
case 3:
- ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+ ipvs->drop_rate = ipvs->sysctl_am_droprate;
break;
}
- spin_unlock(&__ip_vs_droppacket_lock);
+ spin_unlock(&ipvs->droppacket_lock);
/* secure_tcp */
- spin_lock(&ip_vs_securetcp_lock);
- switch (sysctl_ip_vs_secure_tcp) {
+ spin_lock(&ipvs->securetcp_lock);
+ switch (ipvs->sysctl_secure_tcp) {
case 0:
if (old_secure_tcp >= 2)
to_change = 0;
@@ -216,7 +183,7 @@
if (nomem) {
if (old_secure_tcp < 2)
to_change = 1;
- sysctl_ip_vs_secure_tcp = 2;
+ ipvs->sysctl_secure_tcp = 2;
} else {
if (old_secure_tcp >= 2)
to_change = 0;
@@ -229,7 +196,7 @@
} else {
if (old_secure_tcp >= 2)
to_change = 0;
- sysctl_ip_vs_secure_tcp = 1;
+ ipvs->sysctl_secure_tcp = 1;
}
break;
case 3:
@@ -237,10 +204,11 @@
to_change = 1;
break;
}
- old_secure_tcp = sysctl_ip_vs_secure_tcp;
+ old_secure_tcp = ipvs->sysctl_secure_tcp;
if (to_change >= 0)
- ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
- spin_unlock(&ip_vs_securetcp_lock);
+ ip_vs_protocol_timeout_change(ipvs,
+ ipvs->sysctl_secure_tcp > 1);
+ spin_unlock(&ipvs->securetcp_lock);
local_bh_enable();
}
@@ -250,16 +218,16 @@
* Timer for checking the defense
*/
#define DEFENSE_TIMER_PERIOD 1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
static void defense_work_handler(struct work_struct *work)
{
- update_defense_level();
- if (atomic_read(&ip_vs_dropentry))
- ip_vs_random_dropentry();
+ struct netns_ipvs *ipvs =
+ container_of(work, struct netns_ipvs, defense_work.work);
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+ update_defense_level(ipvs);
+ if (atomic_read(&ipvs->dropentry))
+ ip_vs_random_dropentry(ipvs->net);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
int
@@ -287,33 +255,13 @@
/* the service table hashed by fwmark */
static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
-/*
- * Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- * Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- * FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
/*
* Returns hash value for virtual service
*/
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
- __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+ const union nf_inet_addr *addr, __be16 port)
{
register unsigned porth = ntohs(port);
__be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
+ addr_fold ^= ((size_t)net>>8);
return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
& IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
{
- return fwmark & IP_VS_SVC_TAB_MASK;
+ return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
/*
- * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
* or in the ip_vs_svc_fwm_table by fwmark.
* Should be called with locked tables.
*/
@@ -353,16 +302,16 @@
if (svc->fwmark == 0) {
/*
- * Hash it by <protocol,addr,port> in ip_vs_svc_table
+ * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
- svc->port);
+ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+ &svc->addr, svc->port);
list_add(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
- * Hash it by fwmark in ip_vs_svc_fwm_table
+ * Hash it by fwmark in svc_fwm_table
*/
- hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
@@ -374,7 +323,7 @@
/*
- * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ * Unhashes a service from svc_table / svc_fwm_table.
* Should be called with locked tables.
*/
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@
}
if (svc->fwmark == 0) {
- /* Remove it from the ip_vs_svc_table table */
+ /* Remove it from the svc_table table */
list_del(&svc->s_list);
} else {
- /* Remove it from the ip_vs_svc_fwm_table table */
+ /* Remove it from the svc_fwm_table table */
list_del(&svc->f_list);
}
@@ -400,23 +349,24 @@
/*
- * Get service by {proto,addr,port} in the service table.
+ * Get service by {netns, proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
- __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
- && (svc->protocol == protocol)) {
+ && (svc->protocol == protocol)
+ && net_eq(svc->net, net)) {
/* HIT */
return svc;
}
@@ -430,16 +380,17 @@
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
- hash = ip_vs_svc_fwm_hashkey(fwmark);
+ hash = ip_vs_svc_fwm_hashkey(net, fwmark);
list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
- if (svc->fwmark == fwmark && svc->af == af) {
+ if (svc->fwmark == fwmark && svc->af == af
+ && net_eq(svc->net, net)) {
/* HIT */
return svc;
}
@@ -449,42 +400,44 @@
}
struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
+ struct netns_ipvs *ipvs = net_ipvs(net);
read_lock(&__ip_vs_svc_lock);
/*
* Check the table hashed by fwmark first
*/
- if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+ svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+ if (fwmark && svc)
goto out;
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
- && atomic_read(&ip_vs_ftpsvc_counter)
+ && atomic_read(&ipvs->ftpsvc_counter)
&& (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
/*
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
- && atomic_read(&ip_vs_nullsvc_counter)) {
+ && atomic_read(&ipvs->nullsvc_counter)) {
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
}
out:
@@ -519,6 +472,7 @@
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port), atomic_read(&svc->usecnt));
+ free_percpu(svc->stats.cpustats);
kfree(svc);
}
}
@@ -545,10 +499,10 @@
}
/*
- * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
* should be called with locked tables.
*/
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned hash;
@@ -562,19 +516,19 @@
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ip_vs_rtable[hash]);
+ list_add(&dest->d_list, &ipvs->rs_table[hash]);
return 1;
}
/*
- * UNhashes ip_vs_dest from ip_vs_rtable.
+ * UNhashes ip_vs_dest from rs_table.
* should be called with locked tables.
*/
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
- * Remove it from the ip_vs_rtable table.
+ * Remove it from the rs_table table.
*/
if (!list_empty(&dest->d_list)) {
list_del(&dest->d_list);
@@ -588,10 +542,11 @@
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr,
__be16 dport)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
unsigned hash;
struct ip_vs_dest *dest;
@@ -601,19 +556,19 @@
*/
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&__ip_vs_rs_lock);
- list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+ read_lock(&ipvs->rs_lock);
+ list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
&& (dest->port == dport)
&& ((dest->protocol == protocol) ||
dest->vfwmark)) {
/* HIT */
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return dest;
}
}
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return NULL;
}
@@ -652,15 +607,16 @@
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+ const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
- __be16 vport, __u16 protocol)
+ __be16 vport, __u16 protocol, __u32 fwmark)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+ svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@
__be16 dport)
{
struct ip_vs_dest *dest, *nxt;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
/*
* Find the destination in trash
*/
- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
@@ -720,6 +677,7 @@
list_del(&dest->n_list);
ip_vs_dst_reset(dest);
__ip_vs_unbind_svc(dest);
+ free_percpu(dest->stats.cpustats);
kfree(dest);
}
}
@@ -737,14 +695,16 @@
* are expired, and the refcnt of each destination in the trash must
* be 1, so we simply release them here.
*/
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
{
struct ip_vs_dest *dest, *nxt;
+ struct netns_ipvs *ipvs = net_ipvs(net);
- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
list_del(&dest->n_list);
ip_vs_dst_reset(dest);
__ip_vs_unbind_svc(dest);
+ free_percpu(dest->stats.cpustats);
kfree(dest);
}
}
@@ -768,6 +728,7 @@
__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
int conn_flags;
/* set the weight and the flags */
@@ -780,12 +741,12 @@
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
/*
- * Put the real service in ip_vs_rtable if not present.
+ * Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&__ip_vs_rs_lock);
- ip_vs_rs_hash(dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
+ ip_vs_rs_hash(ipvs, dest);
+ write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -813,7 +774,7 @@
spin_unlock(&dest->dst_lock);
if (add)
- ip_vs_new_estimator(&dest->stats);
+ ip_vs_new_estimator(svc->net, &dest->stats);
write_lock_bh(&__ip_vs_svc_lock);
@@ -850,12 +811,12 @@
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(&init_net, udest->addr.ip);
+ atype = inet_addr_type(svc->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
@@ -865,6 +826,11 @@
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
+ dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (!dest->stats.cpustats) {
+ pr_err("%s() alloc_percpu failed\n", __func__);
+ goto err_alloc;
+ }
dest->af = svc->af;
dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@
LeaveFunction(2);
return 0;
+
+err_alloc:
+ kfree(dest);
+ return -ENOMEM;
}
@@ -1006,16 +976,18 @@
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
{
- ip_vs_kill_estimator(&dest->stats);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_kill_estimator(net, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_unlock_bh(&ipvs->rs_lock);
/*
* Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@
and only one user context can update virtual service at a
time, so the operation here is OK */
atomic_dec(&dest->svc->refcnt);
+ free_percpu(dest->stats.cpustats);
kfree(dest);
} else {
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- list_add(&dest->n_list, &ip_vs_dest_trash);
+ list_add(&dest->n_list, &ipvs->dest_trash);
atomic_inc(&dest->refcnt);
}
}
@@ -1105,7 +1078,7 @@
/*
* Delete the destination
*/
- __ip_vs_del_dest(dest);
+ __ip_vs_del_dest(svc->net, dest);
LeaveFunction(2);
@@ -1117,13 +1090,14 @@
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@
}
if (u->pe_name && *u->pe_name) {
- pe = ip_vs_pe_get(u->pe_name);
+ pe = ip_vs_pe_getbyname(u->pe_name);
if (pe == NULL) {
pr_info("persistence engine module ip_vs_pe_%s "
"not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@
ret = -ENOMEM;
goto out_err;
}
+ svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (!svc->stats.cpustats) {
+ pr_err("%s() alloc_percpu failed\n", __func__);
+ goto out_err;
+ }
/* I'm the first user of the service */
atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
+ svc->net = net;
INIT_LIST_HEAD(&svc->destinations);
rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@
/* Update the virtual service counters */
if (svc->port == FTPPORT)
- atomic_inc(&ip_vs_ftpsvc_counter);
+ atomic_inc(&ipvs->ftpsvc_counter);
else if (svc->port == 0)
- atomic_inc(&ip_vs_nullsvc_counter);
+ atomic_inc(&ipvs->nullsvc_counter);
- ip_vs_new_estimator(&svc->stats);
+ ip_vs_new_estimator(net, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services++;
+ ipvs->num_services++;
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@
*svc_p = svc;
return 0;
+
out_err:
if (svc != NULL) {
ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@
ip_vs_app_inc_put(svc->inc);
local_bh_enable();
}
+ if (svc->stats.cpustats)
+ free_percpu(svc->stats.cpustats);
kfree(svc);
}
ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@
old_sched = sched;
if (u->pe_name && *u->pe_name) {
- pe = ip_vs_pe_get(u->pe_name);
+ pe = ip_vs_pe_getbyname(u->pe_name);
if (pe == NULL) {
pr_info("persistence engine module ip_vs_pe_%s "
"not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
pr_info("%s: enter\n", __func__);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services--;
+ ipvs->num_services--;
- ip_vs_kill_estimator(&svc->stats);
+ ip_vs_kill_estimator(svc->net, &svc->stats);
/* Unbind scheduler */
old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(dest);
+ __ip_vs_del_dest(svc->net, dest);
}
/*
* Update the virtual service counters
*/
if (svc->port == FTPPORT)
- atomic_dec(&ip_vs_ftpsvc_counter);
+ atomic_dec(&ipvs->ftpsvc_counter);
else if (svc->port == 0)
- atomic_dec(&ip_vs_nullsvc_counter);
+ atomic_dec(&ipvs->nullsvc_counter);
/*
* Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port), atomic_read(&svc->usecnt));
+ free_percpu(svc->stats.cpustats);
kfree(svc);
}
@@ -1428,17 +1413,19 @@
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
{
int idx;
struct ip_vs_service *svc, *nxt;
/*
- * Flush the service table hashed by <protocol,addr,port>
+ * Flush the service table hashed by <netns,protocol,addr,port>
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
- ip_vs_unlink_service(svc);
+ list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+ s_list) {
+ if (net_eq(svc->net, net))
+ ip_vs_unlink_service(svc);
}
}
@@ -1448,7 +1435,8 @@
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt,
&ip_vs_svc_fwm_table[idx], f_list) {
- ip_vs_unlink_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_unlink_service(svc);
}
}
@@ -1472,24 +1460,26 @@
return 0;
}
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
{
int idx;
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- ip_vs_zero_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- ip_vs_zero_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_zero_service(svc);
}
}
- ip_vs_zero_stats(&ip_vs_stats);
+ ip_vs_zero_stats(net_ipvs(net)->tot_stats);
return 0;
}
@@ -1498,6 +1488,7 @@
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = current->nsproxy->net_ns;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1508,7 +1499,7 @@
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level();
+ update_defense_level(net_ipvs(net));
}
}
return rc;
@@ -1534,15 +1525,111 @@
return rc;
}
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ if ((*valp < 0) || (*valp > 1)) {
+ /* Restore the correct value */
+ *valp = val;
+ } else {
+ struct net *net = current->nsproxy->net_ns;
+ ip_vs_sync_switch_mode(net, val);
+ }
+ }
+ return rc;
+}
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ * Do not change order or insert new entries without
+ * align with netns init in __ip_vs_control_init()
*/
static struct ctl_table vs_vars[] = {
{
.procname = "amemthresh",
- .data = &sysctl_ip_vs_amemthresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "am_droprate",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "drop_entry",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_defense_mode,
+ },
+ {
+ .procname = "drop_packet",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_defense_mode,
+ },
+#ifdef CONFIG_IP_VS_NFCT
+ {
+ .procname = "conntrack",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
+ {
+ .procname = "secure_tcp",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_defense_mode,
+ },
+ {
+ .procname = "snat_reroute",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .procname = "sync_version",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_sync_mode,
+ },
+ {
+ .procname = "cache_bypass",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_nodest_conn",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_quiescent_template",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_threshold",
+ .maxlen =
+ sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+ .mode = 0644,
+ .proc_handler = proc_do_sync_threshold,
+ },
+ {
+ .procname = "nat_icmp_send",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
@@ -1556,50 +1643,6 @@
.proc_handler = proc_dointvec,
},
#endif
- {
- .procname = "am_droprate",
- .data = &sysctl_ip_vs_am_droprate,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "drop_entry",
- .data = &sysctl_ip_vs_drop_entry,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_do_defense_mode,
- },
- {
- .procname = "drop_packet",
- .data = &sysctl_ip_vs_drop_packet,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_do_defense_mode,
- },
-#ifdef CONFIG_IP_VS_NFCT
- {
- .procname = "conntrack",
- .data = &sysctl_ip_vs_conntrack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-#endif
- {
- .procname = "secure_tcp",
- .data = &sysctl_ip_vs_secure_tcp,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_do_defense_mode,
- },
- {
- .procname = "snat_reroute",
- .data = &sysctl_ip_vs_snat_reroute,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
#if 0
{
.procname = "timeout_established",
@@ -1686,41 +1729,6 @@
.proc_handler = proc_dointvec_jiffies,
},
#endif
- {
- .procname = "cache_bypass",
- .data = &sysctl_ip_vs_cache_bypass,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_nodest_conn",
- .data = &sysctl_ip_vs_expire_nodest_conn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_quiescent_template",
- .data = &sysctl_ip_vs_expire_quiescent_template,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sync_threshold",
- .data = &sysctl_ip_vs_sync_threshold,
- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
- .mode = 0644,
- .proc_handler = proc_do_sync_threshold,
- },
- {
- .procname = "nat_icmp_send",
- .data = &sysctl_ip_vs_nat_icmp_send,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
@@ -1732,11 +1740,10 @@
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-static struct ctl_table_header * sysctl_header;
-
#ifdef CONFIG_PROC_FS
struct ip_vs_iter {
+ struct seq_net_private p; /* Do not move this, netns depends upon it*/
struct list_head *table;
int bucket;
};
@@ -1763,6 +1770,7 @@
/* Get the Nth entry in the two lists */
static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
{
+ struct net *net = seq_file_net(seq);
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (pos-- == 0){
+ if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
return svc;
@@ -1781,7 +1789,7 @@
/* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (pos-- == 0) {
+ if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
return svc;
@@ -1935,7 +1943,7 @@
static int ip_vs_info_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ip_vs_info_seq_ops,
+ return seq_open_net(inode, file, &ip_vs_info_seq_ops,
sizeof(struct ip_vs_iter));
}
@@ -1949,13 +1957,11 @@
#endif
-struct ip_vs_stats ip_vs_stats = {
- .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
#ifdef CONFIG_PROC_FS
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq_file_single_net(seq);
+ struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -1963,29 +1969,29 @@
seq_printf(seq,
" Conns Packets Packets Bytes Bytes\n");
- spin_lock_bh(&ip_vs_stats.lock);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
- ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
- (unsigned long long) ip_vs_stats.ustats.inbytes,
- (unsigned long long) ip_vs_stats.ustats.outbytes);
+ spin_lock_bh(&tot_stats->lock);
+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+ tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+ (unsigned long long) tot_stats->ustats.inbytes,
+ (unsigned long long) tot_stats->ustats.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq,"%8X %8X %8X %16X %16X\n",
- ip_vs_stats.ustats.cps,
- ip_vs_stats.ustats.inpps,
- ip_vs_stats.ustats.outpps,
- ip_vs_stats.ustats.inbps,
- ip_vs_stats.ustats.outbps);
- spin_unlock_bh(&ip_vs_stats.lock);
+ tot_stats->ustats.cps,
+ tot_stats->ustats.inpps,
+ tot_stats->ustats.outpps,
+ tot_stats->ustats.inbps,
+ tot_stats->ustats.outbps);
+ spin_unlock_bh(&tot_stats->lock);
return 0;
}
static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, ip_vs_stats_show, NULL);
+ return single_open_net(inode, file, ip_vs_stats_show);
}
static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,68 @@
.release = single_release,
};
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_single_net(seq);
+ struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+ int i;
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_puts(seq,
+ " Total Incoming Outgoing Incoming Outgoing\n");
+ seq_printf(seq,
+ "CPU Conns Packets Packets Bytes Bytes\n");
+
+ for_each_possible_cpu(i) {
+ struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+ seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+ i, u->ustats.conns, u->ustats.inpkts,
+ u->ustats.outpkts, (__u64)u->ustats.inbytes,
+ (__u64)u->ustats.outbytes);
+ }
+
+ spin_lock_bh(&tot_stats->lock);
+ seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
+ tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+ tot_stats->ustats.outpkts,
+ (unsigned long long) tot_stats->ustats.inbytes,
+ (unsigned long long) tot_stats->ustats.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_puts(seq,
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, " %8X %8X %8X %16X %16X\n",
+ tot_stats->ustats.cps,
+ tot_stats->ustats.inpps,
+ tot_stats->ustats.outpps,
+ tot_stats->ustats.inbps,
+ tot_stats->ustats.outbps);
+ spin_unlock_bh(&tot_stats->lock);
+
+ return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_stats_percpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
u->tcp_timeout,
u->tcp_fin_timeout,
@@ -2010,19 +2071,22 @@
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
#endif
@@ -2087,6 +2151,7 @@
static int
do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
+ struct net *net = sock_net(sk);
int ret;
unsigned char arg[MAX_ARG_LEN];
struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2186,20 @@
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+ ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+ dm->syncid);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = stop_sync_thread(dm->state);
+ ret = stop_sync_thread(net, dm->state);
goto out_unlock;
}
@@ -2148,7 +2214,7 @@
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out_unlock;
}
}
@@ -2165,10 +2231,10 @@
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2247,7 @@
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2307,8 @@
}
static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+ const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
int idx, count=0;
@@ -2252,7 +2319,7 @@
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET)
+ if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
if (count >= get->num_services)
@@ -2271,7 +2338,7 @@
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET)
+ if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
if (count >= get->num_services)
@@ -2291,7 +2358,7 @@
}
static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2299,9 +2366,9 @@
int ret = 0;
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port);
if (svc) {
@@ -2336,17 +2403,19 @@
}
static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
#ifdef CONFIG_IP_VS_PROTO_TCP
- u->tcp_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
- u->tcp_fin_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+ u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
u->udp_timeout =
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+ pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
}
@@ -2375,7 +2444,10 @@
unsigned char arg[128];
int ret = 0;
unsigned int copylen;
+ struct net *net = sock_net(sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ BUG_ON(!net);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -2418,7 +2490,7 @@
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = ip_vs_conn_tab_size;
- info.num_services = ip_vs_num_services;
+ info.num_services = ipvs->num_services;
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
}
@@ -2437,7 +2509,7 @@
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(get, user);
+ ret = __ip_vs_get_service_entries(net, get, user);
}
break;
@@ -2450,10 +2522,11 @@
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, entry->protocol,
- &addr, entry->port);
+ svc = __ip_vs_service_find(net, AF_INET,
+ entry->protocol, &addr,
+ entry->port);
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2549,7 @@
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(get, user);
+ ret = __ip_vs_get_dest_entries(net, get, user);
}
break;
@@ -2484,7 +2557,7 @@
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -2495,15 +2568,17 @@
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+ if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
- d[0].syncid = ip_vs_master_syncid;
+ strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+ sizeof(d[0].mcast_ifn));
+ d[0].syncid = ipvs->master_syncid;
}
- if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+ if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
- d[1].syncid = ip_vs_backup_syncid;
+ strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+ sizeof(d[1].mcast_ifn));
+ d[1].syncid = ipvs->backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -2542,6 +2617,7 @@
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
.maxattr = IPVS_CMD_MAX,
+ .netnsok = true, /* Make ipvsadm to work on netns */
};
/* Policy used for first-level command attributes */
@@ -2696,11 +2772,12 @@
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
+ struct net *net = skb_sknet(skb);
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
- if (++idx <= start)
+ if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2711,7 +2788,7 @@
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
- if (++idx <= start)
+ if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2727,7 +2804,8 @@
return skb->len;
}
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+ struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
{
@@ -2770,9 +2848,9 @@
}
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
*ret_svc = svc;
@@ -2809,13 +2887,14 @@
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+ struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -2883,6 +2962,7 @@
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+ struct net *net = skb_sknet(skb);
mutex_lock(&__ip_vs_mutex);
@@ -2891,7 +2971,8 @@
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
- svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+ svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3005,20 +3086,23 @@
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
mutex_lock(&__ip_vs_mutex);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ip_vs_master_mcast_ifn,
- ip_vs_master_syncid, cb) < 0)
+ ipvs->master_mcast_ifn,
+ ipvs->master_syncid, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
}
- if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ip_vs_backup_mcast_ifn,
- ip_vs_backup_syncid, cb) < 0)
+ ipvs->backup_mcast_ifn,
+ ipvs->backup_syncid, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3030,31 +3114,33 @@
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ return start_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
}
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ return stop_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3152,7 @@
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(&t);
+ return ip_vs_set_timeout(net, &t);
}
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3162,20 @@
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
+ struct net *net;
+ struct netns_ipvs *ipvs;
+ net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(info->attrs);
+ ret = ip_vs_genl_set_config(net, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_NEW_DAEMON ||
cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3191,13 @@
}
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(daemon_attrs);
+ ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(daemon_attrs);
+ ret = ip_vs_genl_del_daemon(net, daemon_attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out;
}
@@ -3117,7 +3207,7 @@
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(&usvc,
+ ret = ip_vs_genl_parse_service(net, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3147,7 +3237,7 @@
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3185,7 +3275,11 @@
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
+ struct net *net;
+ struct netns_ipvs *ipvs;
+ net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3308,8 @@
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(net,
+ info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
goto out_err;
@@ -3234,7 +3329,7 @@
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3475,172 @@
/* End of Generic Netlink interface definitions */
+/*
+ * per netns intit/exit func.
+ */
+int __net_init __ip_vs_control_init(struct net *net)
+{
+ int idx;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ctl_table *tbl;
+
+ atomic_set(&ipvs->dropentry, 0);
+ spin_lock_init(&ipvs->dropentry_lock);
+ spin_lock_init(&ipvs->droppacket_lock);
+ spin_lock_init(&ipvs->securetcp_lock);
+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+ /* Initialize rs_table */
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+ INIT_LIST_HEAD(&ipvs->dest_trash);
+ atomic_set(&ipvs->ftpsvc_counter, 0);
+ atomic_set(&ipvs->nullsvc_counter, 0);
+
+ /* procfs stats */
+ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+ if (ipvs->tot_stats == NULL) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (!ipvs->cpustats) {
+ pr_err("%s() alloc_percpu failed\n", __func__);
+ goto err_alloc;
+ }
+ spin_lock_init(&ipvs->tot_stats->lock);
+
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+ proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+ proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+ &ip_vs_stats_percpu_fops);
+
+ if (!net_eq(net, &init_net)) {
+ tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ } else
+ tbl = vs_vars;
+ /* Initialize sysctl defaults */
+ idx = 0;
+ ipvs->sysctl_amemthresh = 1024;
+ tbl[idx++].data = &ipvs->sysctl_amemthresh;
+ ipvs->sysctl_am_droprate = 10;
+ tbl[idx++].data = &ipvs->sysctl_am_droprate;
+ tbl[idx++].data = &ipvs->sysctl_drop_entry;
+ tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+ tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+ tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+ ipvs->sysctl_snat_reroute = 1;
+ tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+ ipvs->sysctl_sync_ver = 1;
+ tbl[idx++].data = &ipvs->sysctl_sync_ver;
+ tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+ tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+ tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+ ipvs->sysctl_sync_threshold[0] = 3;
+ ipvs->sysctl_sync_threshold[1] = 50;
+ tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+ ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+ vs_vars);
+ if (ipvs->sysctl_hdr == NULL)
+ goto err_reg;
+ ip_vs_new_estimator(net, ipvs->tot_stats);
+ ipvs->sysctl_tbl = tbl;
+ /* Schedule defense work */
+ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(tbl);
+err_dup:
+ free_percpu(ipvs->cpustats);
+err_alloc:
+ kfree(ipvs->tot_stats);
+ return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_trash_cleanup(net);
+ ip_vs_kill_estimator(net, ipvs->tot_stats);
+ cancel_delayed_work_sync(&ipvs->defense_work);
+ cancel_work_sync(&ipvs->defense_work.work);
+ unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ proc_net_remove(net, "ip_vs_stats_percpu");
+ proc_net_remove(net, "ip_vs_stats");
+ proc_net_remove(net, "ip_vs");
+ free_percpu(ipvs->cpustats);
+ kfree(ipvs->tot_stats);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+ .init = __ip_vs_control_init,
+ .exit = __ip_vs_control_cleanup,
+};
int __init ip_vs_control_init(void)
{
- int ret;
int idx;
+ int ret;
EnterFunction(2);
- /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+ /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
}
- for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+ ret = register_pernet_subsys(&ipvs_control_ops);
+ if (ret) {
+ pr_err("cannot register namespace.\n");
+ goto err;
}
- smp_wmb();
+
+ smp_wmb(); /* Do we really need it now ? */
ret = nf_register_sockopt(&ip_vs_sockopts);
if (ret) {
pr_err("cannot register sockopt.\n");
- return ret;
+ goto err_net;
}
ret = ip_vs_genl_register();
if (ret) {
pr_err("cannot register Generic Netlink interface.\n");
nf_unregister_sockopt(&ip_vs_sockopts);
- return ret;
+ goto err_net;
}
- proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
- proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
- ip_vs_new_estimator(&ip_vs_stats);
-
- /* Hook the defense timer */
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
LeaveFunction(2);
return 0;
+
+err_net:
+ unregister_pernet_subsys(&ipvs_control_ops);
+err:
+ return ret;
}
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
- ip_vs_trash_cleanup();
- cancel_delayed_work_sync(&defense_work);
- cancel_work_sync(&defense_work.work);
- ip_vs_kill_estimator(&ip_vs_stats);
- unregister_sysctl_table(sysctl_header);
- proc_net_remove(&init_net, "ip_vs_stats");
- proc_net_remove(&init_net, "ip_vs");
+ unregister_pernet_subsys(&ipvs_control_ops);
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801..f560a05 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
- *
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * Affected data: est_list and est_lock.
+ * estimation_timer() runs with timer per netns.
+ * get_stats()) do the per cpu summing.
*/
#define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
*/
-static void estimation_timer(unsigned long arg);
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+ struct ip_vs_cpu_stats *stats)
+{
+ int i;
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
+ for_each_possible_cpu(i) {
+ struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+ unsigned int start;
+ __u64 inbytes, outbytes;
+ if (i) {
+ sum->conns += s->ustats.conns;
+ sum->inpkts += s->ustats.inpkts;
+ sum->outpkts += s->ustats.outpkts;
+ do {
+ start = u64_stats_fetch_begin_bh(&s->syncp);
+ inbytes = s->ustats.inbytes;
+ outbytes = s->ustats.outbytes;
+ } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+ sum->inbytes += inbytes;
+ sum->outbytes += outbytes;
+ } else {
+ sum->conns = s->ustats.conns;
+ sum->inpkts = s->ustats.inpkts;
+ sum->outpkts = s->ustats.outpkts;
+ do {
+ start = u64_stats_fetch_begin_bh(&s->syncp);
+ sum->inbytes = s->ustats.inbytes;
+ sum->outbytes = s->ustats.outbytes;
+ } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+ }
+ }
+}
+
static void estimation_timer(unsigned long arg)
{
@@ -62,11 +97,16 @@
u32 n_inpkts, n_outpkts;
u64 n_inbytes, n_outbytes;
u32 rate;
+ struct net *net = (struct net *)arg;
+ struct netns_ipvs *ipvs;
- spin_lock(&est_lock);
- list_for_each_entry(e, &est_list, list) {
+ ipvs = net_ipvs(net);
+ ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
+ spin_lock(&ipvs->est_lock);
+ list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
+ ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
spin_lock(&s->lock);
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@
n_outbytes = s->ustats.outbytes;
/* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns)<<9;
+ rate = (n_conns - e->last_conns) << 9;
e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps)>>2;
- s->ustats.cps = (e->cps+0x1FF)>>10;
+ e->cps += ((long)rate - (long)e->cps) >> 2;
+ s->ustats.cps = (e->cps + 0x1FF) >> 10;
- rate = (n_inpkts - e->last_inpkts)<<9;
+ rate = (n_inpkts - e->last_inpkts) << 9;
e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps)>>2;
- s->ustats.inpps = (e->inpps+0x1FF)>>10;
+ e->inpps += ((long)rate - (long)e->inpps) >> 2;
+ s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
- rate = (n_outpkts - e->last_outpkts)<<9;
+ rate = (n_outpkts - e->last_outpkts) << 9;
e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps)>>2;
- s->ustats.outpps = (e->outpps+0x1FF)>>10;
+ e->outpps += ((long)rate - (long)e->outpps) >> 2;
+ s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
- rate = (n_inbytes - e->last_inbytes)<<4;
+ rate = (n_inbytes - e->last_inbytes) << 4;
e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps)>>2;
- s->ustats.inbps = (e->inbps+0xF)>>5;
+ e->inbps += ((long)rate - (long)e->inbps) >> 2;
+ s->ustats.inbps = (e->inbps + 0xF) >> 5;
- rate = (n_outbytes - e->last_outbytes)<<4;
+ rate = (n_outbytes - e->last_outbytes) << 4;
e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps)>>2;
- s->ustats.outbps = (e->outbps+0xF)>>5;
+ e->outbps += ((long)rate - (long)e->outbps) >> 2;
+ s->ustats.outbps = (e->outbps + 0xF) >> 5;
spin_unlock(&s->lock);
}
- spin_unlock(&est_lock);
- mod_timer(&est_timer, jiffies + 2*HZ);
+ spin_unlock(&ipvs->est_lock);
+ mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@
est->last_outbytes = stats->ustats.outbytes;
est->outbps = stats->ustats.outbps<<5;
- spin_lock_bh(&est_lock);
- list_add(&est->list, &est_list);
- spin_unlock_bh(&est_lock);
+ spin_lock_bh(&ipvs->est_lock);
+ list_add(&est->list, &ipvs->est_list);
+ spin_unlock_bh(&ipvs->est_lock);
}
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
- spin_lock_bh(&est_lock);
+ spin_lock_bh(&ipvs->est_lock);
list_del(&est->list);
- spin_unlock_bh(&est_lock);
+ spin_unlock_bh(&ipvs->est_lock);
}
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@
est->outbps = 0;
}
+static int __net_init __ip_vs_estimator_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ INIT_LIST_HEAD(&ipvs->est_list);
+ spin_lock_init(&ipvs->est_lock);
+ setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+ mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
+ return 0;
+}
+
+static void __net_exit __ip_vs_estimator_exit(struct net *net)
+{
+ del_timer_sync(&net_ipvs(net)->est_timer);
+}
+static struct pernet_operations ip_vs_app_ops = {
+ .init = __ip_vs_estimator_init,
+ .exit = __ip_vs_estimator_exit,
+};
+
int __init ip_vs_estimator_init(void)
{
- mod_timer(&est_timer, jiffies + 2 * HZ);
- return 0;
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_app_ops);
+ return rv;
}
void ip_vs_estimator_cleanup(void)
{
- del_timer_sync(&est_timer);
+ unregister_pernet_subsys(&ip_vs_app_ops);
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7545500..6b5dd6d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol,
- &from, port, &cp->caddr, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &from, port,
+ &cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
- cp->dest);
+ cp->dest, skb->mark);
if (!n_cp)
return 0;
@@ -257,8 +260,9 @@
* would be adjusted twice.
*/
+ net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -287,6 +291,7 @@
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
- &cp->vaddr, htons(ntohs(cp->vport)-1),
- &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &to, port, &cp->vaddr,
+ htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
htons(ntohs(cp->dport)-1),
- IP_VS_CONN_F_NFCT, cp->dest);
+ IP_VS_CONN_F_NFCT, cp->dest,
+ skb->mark);
if (!n_cp)
return 0;
@@ -377,7 +383,8 @@
/*
* Move tunnel to listen state
*/
- ip_vs_tcp_conn_listen(n_cp);
+ net = skb_net(skb);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return 1;
@@ -398,23 +405,22 @@
.pkt_in = ip_vs_ftp_in,
};
-
/*
- * ip_vs_ftp initialization
+ * per netns ip_vs_ftp initialization
*/
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
{
int i, ret;
struct ip_vs_app *app = &ip_vs_ftp;
- ret = register_ip_vs_app(app);
+ ret = register_ip_vs_app(net, app);
if (ret)
return ret;
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
if (ret)
break;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@
}
if (ret)
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
return ret;
}
+/*
+ * netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+ struct ip_vs_app *app = &ip_vs_ftp;
+ unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+ .init = __ip_vs_ftp_init,
+ .exit = __ip_vs_ftp_exit,
+};
+
+int __init ip_vs_ftp_init(void)
+{
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_ftp_ops);
+ return rv;
+}
/*
* ip_vs_ftp finish.
*/
static void __exit ip_vs_ftp_exit(void)
{
- unregister_ip_vs_app(&ip_vs_ftp);
+ unregister_pernet_subsys(&ip_vs_ftp_ops);
}
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f89..d5bec33 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
/*
@@ -117,7 +116,7 @@
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
- .data = &sysctl_ip_vs_lblc_expiration,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@
{ }
};
-static struct ctl_table_header * sysctl_header;
-
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@@ -248,6 +245,7 @@
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
- en->lastuse + sysctl_ip_vs_lblc_expiration))
+ en->lastuse +
+ ipvs->sysctl_lblc_expiration))
continue;
ip_vs_lblc_free(en);
@@ -543,23 +542,73 @@
.schedule = ip_vs_lblc_schedule,
};
+/*
+ * per netns init.
+ */
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) {
+ ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+ sizeof(vs_vars_table),
+ GFP_KERNEL);
+ if (ipvs->lblc_ctl_table == NULL)
+ goto err_dup;
+ } else
+ ipvs->lblc_ctl_table = vs_vars_table;
+ ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+ ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+ ipvs->lblc_ctl_header =
+ register_net_sysctl_table(net, net_vs_ctl_path,
+ ipvs->lblc_ctl_table);
+ if (!ipvs->lblc_ctl_header)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+ return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblc_ops = {
+ .init = __ip_vs_lblc_init,
+ .exit = __ip_vs_lblc_exit,
+};
static int __init ip_vs_lblc_init(void)
{
int ret;
- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_pernet_subsys(&ip_vs_lblc_ops);
+ if (ret)
+ return ret;
+
ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
if (ret)
- unregister_sysctl_table(sysctl_header);
+ unregister_pernet_subsys(&ip_vs_lblc_ops);
return ret;
}
-
static void __exit ip_vs_lblc_cleanup(void)
{
- unregister_sysctl_table(sysctl_header);
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+ unregister_pernet_subsys(&ip_vs_lblc_ops);
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8e..61ae8cf 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
/*
* for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@
static ctl_table vs_vars_table[] = {
{
.procname = "lblcr_expiration",
- .data = &sysctl_ip_vs_lblcr_expiration,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -304,8 +302,6 @@
{ }
};
-static struct ctl_table_header * sysctl_header;
-
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
@@ -425,14 +421,15 @@
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
- now))
+ if (time_after(en->lastuse
+ + ipvs->sysctl_lblcr_expiration, now))
continue;
ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
@@ -675,7 +673,7 @@
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
- sysctl_ip_vs_lblcr_expiration)) {
+ ipvs->sysctl_lblcr_expiration)) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
@@ -744,23 +742,73 @@
.schedule = ip_vs_lblcr_schedule,
};
+/*
+ * per netns init.
+ */
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) {
+ ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+ sizeof(vs_vars_table),
+ GFP_KERNEL);
+ if (ipvs->lblcr_ctl_table == NULL)
+ goto err_dup;
+ } else
+ ipvs->lblcr_ctl_table = vs_vars_table;
+ ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+ ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+ ipvs->lblcr_ctl_header =
+ register_net_sysctl_table(net, net_vs_ctl_path,
+ ipvs->lblcr_ctl_table);
+ if (!ipvs->lblcr_ctl_header)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblcr_ctl_table);
+
+err_dup:
+ return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblcr_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+ .init = __ip_vs_lblcr_init,
+ .exit = __ip_vs_lblcr_exit,
+};
static int __init ip_vs_lblcr_init(void)
{
int ret;
- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+ if (ret)
+ return ret;
+
ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
if (ret)
- unregister_sysctl_table(sysctl_header);
+ unregister_pernet_subsys(&ip_vs_lblcr_ops);
return ret;
}
-
static void __exit ip_vs_lblcr_cleanup(void)
{
- unregister_sysctl_table(sysctl_header);
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ unregister_pernet_subsys(&ip_vs_lblcr_ops);
}
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647..f454c80 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
@@ -155,7 +156,7 @@
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+ &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af7..5cf859c 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@
}
/* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{
struct ip_vs_pe *pe;
- IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+ IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@
}
/* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
{
struct ip_vs_pe *pe;
/* Search for the pe by name */
- pe = ip_vs_pe_getbyname(name);
+ pe = __ip_vs_pe_getbyname(name);
/* If pe not found, load the module and search again */
if (!pe) {
request_module("ip_vs_pe_%s", name);
- pe = ip_vs_pe_getbyname(name);
+ pe = __ip_vs_pe_getbyname(name);
}
return pe;
}
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
- if (pe && pe->module)
- module_put(pe->module);
-}
-
/* Register a pe in the pe list */
int register_ip_vs_pe(struct ip_vs_pe *pe)
{
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e96..0d83bc0 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
+ int retc;
ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
@@ -83,6 +84,8 @@
if (dataoff >= skb->len)
return -EINVAL;
+ if ((retc=skb_linearize(skb)) < 0)
+ return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c539983..6ac986c 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,31 @@
return 0;
}
+/*
+ * register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+ struct ip_vs_proto_data *pd =
+ kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+ if (!pd) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ pd->pp = pp; /* For speed issues */
+ pd->next = ipvs->proto_data_table[hash];
+ ipvs->proto_data_table[hash] = pd;
+ atomic_set(&pd->appcnt, 0); /* Init app counter */
+
+ if (pp->init_netns != NULL)
+ pp->init_netns(net, pd);
+
+ return 0;
+}
/*
* unregister an ipvs protocol
@@ -82,6 +107,29 @@
return -ESRCH;
}
+/*
+ * unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data **pd_p;
+ unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+ pd_p = &ipvs->proto_data_table[hash];
+ for (; *pd_p; pd_p = &(*pd_p)->next) {
+ if (*pd_p == pd) {
+ *pd_p = pd->next;
+ if (pd->pp->exit_netns != NULL)
+ pd->pp->exit_netns(net, pd);
+ kfree(pd);
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
/*
* get ip_vs_protocol object by its proto.
@@ -100,19 +148,44 @@
}
EXPORT_SYMBOL(ip_vs_proto_get);
+/*
+ * get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+ struct ip_vs_proto_data *pd;
+ unsigned hash = IP_VS_PROTO_HASH(proto);
+
+ for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+ if (pd->pp->protocol == proto)
+ return pd;
+ }
+
+ return NULL;
+}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ return __ipvs_proto_data_get(ipvs, proto);
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
* Propagate event for state change to all protocols
*/
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
{
- struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
int i;
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
- for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
- if (pp->timeout_change)
- pp->timeout_change(pp, flags);
+ for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+ if (pd->pp->timeout_change)
+ pd->pp->timeout_change(pd, flags);
}
}
}
@@ -236,6 +309,46 @@
ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
}
+/*
+ * per network name-space init
+ */
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+ return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd;
+ int i;
+
+ /* unregister all the ipvs proto data for this netns */
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ while ((pd = ipvs->proto_data_table[i]) != NULL)
+ unregister_ip_vs_proto_netns(net, pd);
+ }
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+ .init = __ip_vs_protocol_init,
+ .exit = __ip_vs_protocol_cleanup,
+};
int __init ip_vs_protocol_init(void)
{
@@ -265,6 +378,7 @@
REGISTER_PROTOCOL(&ip_vs_protocol_esp);
#endif
pr_info("Registered protocols (%s)\n", &protocols[2]);
+ return register_pernet_subsys(&ipvs_proto_ops);
return 0;
}
@@ -275,6 +389,7 @@
struct ip_vs_protocol *pp;
int i;
+ unregister_pernet_subsys(&ipvs_proto_ops);
/* unregister all the ipvs protocols */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a04611..5b8eb8b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+ const struct ip_vs_iphdr *iph, int inverse,
+ struct ip_vs_conn_param *p)
{
if (likely(!inverse))
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -72,7 +74,7 @@
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -83,21 +85,21 @@
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -107,7 +109,7 @@
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
/*
@@ -117,26 +119,14 @@
return 0;
}
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
@@ -149,7 +139,6 @@
.app_conn_bind = NULL,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
- .set_state_timeout = NULL,
};
#endif
@@ -159,8 +148,8 @@
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd..fb2d04a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@
sizeof(_schunkh), &_schunkh);
if (sch == NULL)
return 0;
-
+ net = skb_net(skb);
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, sh->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -46,14 +47,19 @@
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pd);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
-
+ /* NF_ACCEPT */
return 1;
}
@@ -856,7 +862,7 @@
/*
* Timeout table[state]
*/
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
[IP_VS_SCTP_S_NONE] = 2 * HZ,
[IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
[IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@@ -900,20 +906,8 @@
return "?";
}
-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-}
-
-static int
-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-
-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
- sctp_state_name_table, sname, to);
-}
-
static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
{
sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@
IP_VS_DBG_BUF(8, "%s %s %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@
}
}
}
+ if (likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = next_state];
+ else /* What to do ? */
+ cp->timeout = sctp_timeouts[cp->state = next_state];
- cp->timeout = pp->timeout_table[cp->state = next_state];
-
- return 1;
+ return 1;
}
static int
sctp_state_transition(struct ip_vs_conn *cp, int direction,
- const struct sk_buff *skb, struct ip_vs_protocol *pp)
+ const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
int ret = 0;
spin_lock(&cp->lock);
- ret = set_sctp_state(pp, cp, direction, skb);
+ ret = set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock);
return ret;
}
-/*
- * Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS 4
-#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
static inline __u16 sctp_app_hashkey(__be16 port)
{
return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
- spin_lock_bh(&sctp_app_lock);
- list_for_each_entry(i, &sctp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->sctp_app_lock);
+ list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &sctp_apps[hash]);
- atomic_inc(&ip_vs_protocol_sctp.appcnt);
+ list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&sctp_app_lock);
+ spin_unlock_bh(&ipvs->sctp_app_lock);
return ret;
}
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&sctp_app_lock);
- atomic_dec(&ip_vs_protocol_sctp.appcnt);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+ spin_lock_bh(&ipvs->sctp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&sctp_app_lock);
+ spin_unlock_bh(&ipvs->sctp_app_lock);
}
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -1074,12 +1066,12 @@
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- spin_lock(&sctp_app_lock);
- list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+ spin_lock(&ipvs->sctp_app_lock);
+ list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&sctp_app_lock);
+ spin_unlock(&ipvs->sctp_app_lock);
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@
goto out;
}
}
- spin_unlock(&sctp_app_lock);
+ spin_unlock(&ipvs->sctp_app_lock);
out:
return result;
}
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(sctp_apps);
- pp->timeout_table = sctp_timeouts;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+ sizeof(sctp_timeouts));
}
-
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
-
+ kfree(pd->timeout_table);
}
struct ip_vs_protocol ip_vs_protocol_sctp = {
- .name = "SCTP",
- .protocol = IPPROTO_SCTP,
- .num_states = IP_VS_SCTP_S_LAST,
- .dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
- .init = ip_vs_sctp_init,
- .exit = ip_vs_sctp_exit,
- .register_app = sctp_register_app,
+ .name = "SCTP",
+ .protocol = IPPROTO_SCTP,
+ .num_states = IP_VS_SCTP_S_LAST,
+ .dont_defrag = 0,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_sctp_init,
+ .exit_netns = __ip_vs_sctp_exit,
+ .register_app = sctp_register_app,
.unregister_app = sctp_unregister_app,
- .conn_schedule = sctp_conn_schedule,
- .conn_in_get = ip_vs_conn_in_get_proto,
- .conn_out_get = ip_vs_conn_out_get_proto,
- .snat_handler = sctp_snat_handler,
- .dnat_handler = sctp_dnat_handler,
- .csum_check = sctp_csum_check,
- .state_name = sctp_state_name,
+ .conn_schedule = sctp_conn_schedule,
+ .conn_in_get = ip_vs_conn_in_get_proto,
+ .conn_out_get = ip_vs_conn_out_get_proto,
+ .snat_handler = sctp_snat_handler,
+ .dnat_handler = sctp_dnat_handler,
+ .csum_check = sctp_csum_check,
+ .state_name = sctp_state_name,
.state_transition = sctp_state_transition,
- .app_conn_bind = sctp_app_conn_bind,
- .debug_packet = ip_vs_tcpudp_debug_packet,
- .timeout_change = sctp_timeout_change,
- .set_state_timeout = sctp_set_state_timeout,
+ .app_conn_bind = sctp_app_conn_bind,
+ .debug_packet = ip_vs_tcpudp_debug_packet,
+ .timeout_change = NULL,
};
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200..c0cc341 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
*
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * tcp_timeouts table has copy per netns in a hash table per
+ * protocol ip_vs_proto_data and is handled by netns
*/
#define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@
*verdict = NF_DROP;
return 0;
}
-
+ net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
if (th->syn &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
- th->dest))) {
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+ &iph.daddr, th->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -63,13 +68,19 @@
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pd);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
@@ -338,7 +349,7 @@
/*
* Timeout table[state]
*/
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = 2*HZ,
[IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -437,10 +448,7 @@
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
};
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
{
int on = (flags & 1); /* secure_tcp */
@@ -450,14 +458,7 @@
** for most if not for all of the applications. Something
** like "capabilities" (flags) for each object.
*/
- tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
- tcp_state_name_table, sname, to);
+ pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
}
static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@
}
static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, struct tcphdr *th)
{
int state_idx;
@@ -497,7 +498,8 @@
goto tcp_state_out;
}
- new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+ new_state =
+ pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
tcp_state_out:
if (new_state != cp->state) {
@@ -505,7 +507,7 @@
IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((state_off == TCP_DIR_OUTPUT) ?
"output " : "input "),
th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@
}
}
- cp->timeout = pp->timeout_table[cp->state = new_state];
+ if (likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = new_state];
+ else /* What to do ? */
+ cp->timeout = tcp_timeouts[cp->state = new_state];
}
-
/*
* Handle state transitions
*/
static int
tcp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
struct tcphdr _tcph, *th;
@@ -560,23 +564,12 @@
return 0;
spin_lock(&cp->lock);
- set_tcp_state(pp, cp, direction, th);
+ set_tcp_state(pd, cp, direction, th);
spin_unlock(&cp->lock);
return 1;
}
-
-/*
- * Hash table for TCP application incarnations
- */
-#define TCP_APP_TAB_BITS 4
-#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
-#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
static inline __u16 tcp_app_hashkey(__be16 port)
{
return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@
}
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
- spin_lock_bh(&tcp_app_lock);
- list_for_each_entry(i, &tcp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &tcp_apps[hash]);
- atomic_inc(&ip_vs_protocol_tcp.appcnt);
+ list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&tcp_app_lock);
- atomic_dec(&ip_vs_protocol_tcp.appcnt);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
}
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -633,12 +632,12 @@
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&tcp_app_lock);
- list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+ spin_lock(&ipvs->tcp_app_lock);
+ list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@
goto out;
}
}
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
out:
return result;
@@ -665,24 +664,35 @@
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
spin_lock(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
- cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+ cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+ : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
spin_unlock(&cp->lock);
}
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(tcp_apps);
- pp->timeout_table = tcp_timeouts;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+ sizeof(tcp_timeouts));
+ pd->tcp_state_table = tcp_states;
}
-
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -691,9 +701,10 @@
.protocol = IPPROTO_TCP,
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
- .init = ip_vs_tcp_init,
- .exit = ip_vs_tcp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_tcp_init,
+ .exit_netns = __ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
@@ -707,5 +718,4 @@
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
- .set_state_timeout = tcp_set_state_timeout,
};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a0..f1282cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * Network name space (netns) aware.
*
*/
@@ -28,9 +29,10 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@
*verdict = NF_DROP;
return 0;
}
-
- svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ net = skb_net(skb);
+ svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, uh->dest);
if (svc) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -62,13 +64,19 @@
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pd);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
@@ -338,19 +346,6 @@
return 1;
}
-
-/*
- * Note: the caller guarantees that only one of register_app,
- * unregister_app or app_conn_bind is called each time.
- */
-
-#define UDP_APP_TAB_BITS 4
-#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
-#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
static inline __u16 udp_app_hashkey(__be16 port)
{
return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@
}
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
- spin_lock_bh(&udp_app_lock);
- list_for_each_entry(i, &udp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->udp_app_lock);
+ list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &udp_apps[hash]);
- atomic_inc(&ip_vs_protocol_udp.appcnt);
+ list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&udp_app_lock);
- atomic_dec(&ip_vs_protocol_udp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ spin_lock_bh(&ipvs->udp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
}
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -407,12 +408,12 @@
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&udp_app_lock);
- list_for_each_entry(inc, &udp_apps[hash], p_list) {
+ spin_lock(&ipvs->udp_app_lock);
+ list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@
goto out;
}
}
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
out:
return result;
}
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_NORMAL] = 5*60*HZ,
[IP_VS_UDP_S_LAST] = 2*HZ,
};
@@ -446,14 +447,6 @@
[IP_VS_UDP_S_LAST] = "BUG!",
};
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
- udp_state_name_table, sname, to);
-}
-
static const char * udp_state_name(int state)
{
if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@
static int
udp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+ if (unlikely(!pd)) {
+ pr_err("UDP no ns data\n");
+ return 0;
+ }
+
+ cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
return 1;
}
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(udp_apps);
- pp->timeout_table = udp_timeouts;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->udp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+ sizeof(udp_timeouts));
}
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -486,8 +489,10 @@
.protocol = IPPROTO_UDP,
.num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
- .init = udp_init,
- .exit = udp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __udp_init,
+ .exit_netns = __udp_exit,
.conn_schedule = udp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@
.app_conn_bind = udp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL,
- .set_state_timeout = udp_set_state_timeout,
};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aed..d1adf98 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
* high-performance and highly available server based on a
* cluster of servers.
*
+ * Version 1, is capable of handling both version 0 and 1 messages.
+ * Version 0 is the plain old format.
+ * Note Version 0 receivers will just drop Ver 1 messages.
+ * Version 1 is capable of handle IPv6, Persistence data,
+ * time-outs, and firewall marks.
+ * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions Message: is a complete datagram
+ * Sync_conn: is a part of a Message
+ * Param Data is an option to a Sync_conn.
+ *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
* Alexandre Cassen : Added SyncID support for incoming sync
* messages filtering.
* Justin Ossevoort : Fix endian problem on sync message size.
+ * Hans Schillstrom : Added Version 1: i.e. IPv6,
+ * Persistence support, fwmark and time-out.
*/
#define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
#include <linux/wait.h>
#include <linux/kernel.h>
+#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
+
#include <net/ip.h>
#include <net/sock.h>
@@ -43,11 +59,13 @@
#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
#define IP_VS_SYNC_PORT 8848 /* multicast port */
+#define SYNC_PROTO_VER 1 /* Protocol version in header */
/*
* IPVS sync connection entry
+ * Version 0, i.e. original version.
*/
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
__u8 reserved;
/* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@
struct ip_vs_seq out_seq; /* outgoing seq. struct */
};
-struct ip_vs_sync_thread_data {
- struct socket *sock;
- char *buf;
-};
-
-#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
-#define FULL_CONN_SIZE \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
-
-
/*
- The master mulitcasts messages to the backup load balancers in the
- following format.
+ Sync Connection format (sync_conn)
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Count Conns | SyncID | Size |
+ | Type | Protocol | Ver. | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | State | cport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | vport | dport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | fwmark |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | timeout (in sec.) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ... |
+ | IP-Addresses (v4 or v6) |
+ | ... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ Optional Parameters.
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param. Type | Param. Length | Param. data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+ | ... |
+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | Param Type | Param. Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param data |
+ | Last Param data should be padded for 32 bit alignment |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ * Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ struct in6_addr caddr; /* client address */
+ struct in6_addr vaddr; /* virtual address */
+ struct in6_addr daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+ struct ip_vs_sync_v4 v4;
+ struct ip_vs_sync_v6 v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6 0
+#define STYPE_F_INET6 (1 << STYPE_INET6)
+
+#define SVER_SHIFT 12 /* Shift to get version */
+#define SVER_MASK 0x0fff /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA 1
+#define IPVS_OPT_PE_DATA 2
+#define IPVS_OPT_PE_NAME 3
+#define IPVS_OPT_PARAM 7
+
+#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
+
+struct ip_vs_sync_thread_data {
+ struct net *net;
+ struct socket *sock;
+ char *buf;
+};
+
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
+#define FULL_CONN_SIZE \
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+ The master mulitcasts messages (Datagrams) to the backup load balancers
+ in the following format.
+
+ Version 1:
+ Note, first byte should be Zero, so ver 0 receivers will drop the packet.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | 0 | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | Version | Reserved, set to Zero |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (1) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
- | . |
+ ~ . ~
| . |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (n) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | IPVS Sync Connection (1) |
*/
#define SYNC_MESG_HEADER_LEN 4
#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
__u8 nr_conns;
__u8 syncid;
__u16 size;
@@ -113,9 +249,16 @@
/* ip_vs_sync_conn entries start here */
};
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+ __u8 reserved; /* must be zero */
+ __u8 syncid;
+ __u16 size;
+ __u8 nr_conns;
+ __s8 version; /* SYNC_PROTO_VER */
+ __u16 spare;
+ /* ip_vs_sync_conn entries start here */
+};
struct ip_vs_sync_buff {
struct list_head list;
@@ -127,28 +270,6 @@
unsigned char *end;
};
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
/* multicast addr */
static struct sockaddr_in mcast_addr = {
.sin_family = AF_INET,
@@ -156,41 +277,71 @@
.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
};
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+ ho->init_seq = get_unaligned_be32(&no->init_seq);
+ ho->delta = get_unaligned_be32(&no->delta);
+ ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+ put_unaligned_be32(ho->init_seq, &no->init_seq);
+ put_unaligned_be32(ho->delta, &no->delta);
+ put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
+
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&ip_vs_sync_lock);
- if (list_empty(&ip_vs_sync_queue)) {
+ spin_lock_bh(&ipvs->sync_lock);
+ if (list_empty(&ipvs->sync_queue)) {
sb = NULL;
} else {
- sb = list_entry(ip_vs_sync_queue.next,
+ sb = list_entry(ipvs->sync_queue.next,
struct ip_vs_sync_buff,
list);
list_del(&sb->list);
}
- spin_unlock_bh(&ip_vs_sync_lock);
+ spin_unlock_bh(&ipvs->sync_lock);
return sb;
}
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
kfree(sb);
return NULL;
}
+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
+ sb->mesg->version = SYNC_PROTO_VER;
+ sb->mesg->syncid = ipvs->master_syncid;
+ sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
sb->mesg->nr_conns = 0;
- sb->mesg->syncid = ip_vs_master_syncid;
- sb->mesg->size = 4;
- sb->head = (unsigned char *)sb->mesg + 4;
- sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+ sb->mesg->spare = 0;
+ sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+ sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+
sb->firstuse = jiffies;
return sb;
}
@@ -201,14 +352,16 @@
kfree(sb);
}
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
{
- spin_lock(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&sb->list, &ip_vs_sync_queue);
+ struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+ spin_lock(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ipvs->sync_queue);
else
ip_vs_sync_buff_release(sb);
- spin_unlock(&ip_vs_sync_lock);
+ spin_unlock(&ipvs->sync_lock);
}
/*
@@ -216,36 +369,101 @@
* than the specified time or the specified time is zero.
*/
static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&curr_sb_lock);
- if (curr_sb && (time == 0 ||
- time_before(jiffies - curr_sb->firstuse, time))) {
- sb = curr_sb;
- curr_sb = NULL;
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ if (ipvs->sync_buff && (time == 0 ||
+ time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+ sb = ipvs->sync_buff;
+ ipvs->sync_buff = NULL;
} else
sb = NULL;
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return sb;
}
+/*
+ * Switch mode from sending version 0 or 1
+ * - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!ipvs->sync_state & IP_VS_STATE_MASTER)
+ return;
+ if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+ return;
+
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ /* Buffer empty ? then let buf_create do the job */
+ if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+ kfree(ipvs->sync_buff);
+ ipvs->sync_buff = NULL;
+ } else {
+ spin_lock_bh(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&ipvs->sync_buff->list,
+ &ipvs->sync_queue);
+ else
+ ip_vs_sync_buff_release(ipvs->sync_buff);
+ spin_unlock_bh(&ipvs->sync_lock);
+ }
+ spin_unlock_bh(&ipvs->sync_buff_lock);
+}
/*
- * Add an ip_vs_conn information into the current sync_buff.
- * Called by ip_vs_in.
+ * Create a new sync buffer for Version 0 proto.
*/
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
{
- struct ip_vs_sync_mesg *m;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_buff *sb;
+ struct ip_vs_sync_mesg_v0 *mesg;
+
+ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+ return NULL;
+
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
+ kfree(sb);
+ return NULL;
+ }
+ mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+ mesg->nr_conns = 0;
+ mesg->syncid = ipvs->master_syncid;
+ mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+ sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+ sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+ sb->firstuse = jiffies;
+ return sb;
+}
+
+/*
+ * Version 0 , could be switched in by sys_ctl.
+ * Add an ip_vs_conn information into the current sync_buff.
+ */
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_sync_mesg_v0 *m;
+ struct ip_vs_sync_conn_v0 *s;
int len;
- spin_lock(&curr_sb_lock);
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create())) {
- spin_unlock(&curr_sb_lock);
+ if (unlikely(cp->af != AF_INET))
+ return;
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ return;
+
+ spin_lock(&ipvs->sync_buff_lock);
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff =
+ ip_vs_sync_buff_create_v0(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -253,10 +471,11 @@
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn *)curr_sb->head;
+ m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+ s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
/* copy members */
+ s->reserved = 0;
s->protocol = cp->protocol;
s->cport = cp->cport;
s->vport = cp->vport;
@@ -274,83 +493,366 @@
m->nr_conns++;
m->size += len;
- curr_sb->head += len;
+ ipvs->sync_buff->head += len;
/* check if there is a space for next one */
- if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
if (cp->control)
- ip_vs_sync_conn(cp->control);
+ ip_vs_sync_conn(net, cp->control);
}
-static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
- const union nf_inet_addr *caddr, __be16 cport,
- const union nf_inet_addr *vaddr, __be16 vport,
- struct ip_vs_conn_param *p)
+/*
+ * Add an ip_vs_conn information into the current sync_buff.
+ * Called by ip_vs_in.
+ * Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
{
- /* XXX: Need to take into account persistence engine */
- ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_sync_mesg *m;
+ union ip_vs_sync_conn *s;
+ __u8 *p;
+ unsigned int len, pe_name_len, pad;
+
+ /* Handle old version of the protocol */
+ if (ipvs->sysctl_sync_ver == 0) {
+ ip_vs_sync_conn_v0(net, cp);
+ return;
+ }
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ goto control;
+sloop:
+ /* Sanity checks */
+ pe_name_len = 0;
+ if (cp->pe_data_len) {
+ if (!cp->pe_data || !cp->dest) {
+ IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+ return;
+ }
+ pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+ }
+
+ spin_lock(&ipvs->sync_buff_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ len = sizeof(struct ip_vs_sync_v6);
+ else
+#endif
+ len = sizeof(struct ip_vs_sync_v4);
+
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+ len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+ if (cp->pe_data_len)
+ len += cp->pe_data_len + 2; /* + Param hdr field */
+ if (pe_name_len)
+ len += pe_name_len + 2;
+
+ /* check if there is a space for this one */
+ pad = 0;
+ if (ipvs->sync_buff) {
+ pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+ if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
+ pad = 0;
+ }
+ }
+
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
+ pr_err("ip_vs_sync_buff_create failed.\n");
+ return;
+ }
+ }
+
+ m = ipvs->sync_buff->mesg;
+ p = ipvs->sync_buff->head;
+ ipvs->sync_buff->head += pad + len;
+ m->size += pad + len;
+ /* Add ev. padding from prev. sync_conn */
+ while (pad--)
+ *(p++) = 0;
+
+ s = (union ip_vs_sync_conn *)p;
+
+ /* Set message type & copy members */
+ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+ s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
+ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+ s->v4.state = htons(cp->state);
+ s->v4.protocol = cp->protocol;
+ s->v4.cport = cp->cport;
+ s->v4.vport = cp->vport;
+ s->v4.dport = cp->dport;
+ s->v4.fwmark = htonl(cp->fwmark);
+ s->v4.timeout = htonl(cp->timeout / HZ);
+ m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6) {
+ p += sizeof(struct ip_vs_sync_v6);
+ ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+ ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+ ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+ } else
+#endif
+ {
+ p += sizeof(struct ip_vs_sync_v4); /* options ptr */
+ s->v4.caddr = cp->caddr.ip;
+ s->v4.vaddr = cp->vaddr.ip;
+ s->v4.daddr = cp->daddr.ip;
+ }
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ *(p++) = IPVS_OPT_SEQ_DATA;
+ *(p++) = sizeof(struct ip_vs_sync_conn_options);
+ hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+ p += sizeof(struct ip_vs_seq);
+ hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+ p += sizeof(struct ip_vs_seq);
+ }
+ /* Handle pe data */
+ if (cp->pe_data_len && cp->pe_data) {
+ *(p++) = IPVS_OPT_PE_DATA;
+ *(p++) = cp->pe_data_len;
+ memcpy(p, cp->pe_data, cp->pe_data_len);
+ p += cp->pe_data_len;
+ if (pe_name_len) {
+ /* Add PE_NAME */
+ *(p++) = IPVS_OPT_PE_NAME;
+ *(p++) = pe_name_len;
+ memcpy(p, cp->pe->name, pe_name_len);
+ p += pe_name_len;
+ }
+ }
+
+ spin_unlock(&ipvs->sync_buff_lock);
+
+control:
+ /* synchronize its controller if it has */
+ cp = cp->control;
+ if (!cp)
+ return;
+ /*
+ * Reduce sync rate for templates
+ * i.e only increment in_pkts for Templates.
+ */
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+ int pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+ return;
+ }
+ goto sloop;
+}
+
+/*
+ * fill_param used by version 1
+ */
+static inline int
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+ struct ip_vs_conn_param *p,
+ __u8 *pe_data, unsigned int pe_data_len,
+ __u8 *pe_name, unsigned int pe_name_len)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+ (const union nf_inet_addr *)&sc->v6.caddr,
+ sc->v6.cport,
+ (const union nf_inet_addr *)&sc->v6.vaddr,
+ sc->v6.vport, p);
+ else
+#endif
+ ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+ (const union nf_inet_addr *)&sc->v4.caddr,
+ sc->v4.cport,
+ (const union nf_inet_addr *)&sc->v4.vaddr,
+ sc->v4.vport, p);
+ /* Handle pe data */
+ if (pe_data_len) {
+ if (pe_name_len) {
+ char buff[IP_VS_PENAME_MAXLEN+1];
+
+ memcpy(buff, pe_name, pe_name_len);
+ buff[pe_name_len]=0;
+ p->pe = __ip_vs_pe_getbyname(buff);
+ if (!p->pe) {
+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+ buff);
+ return 1;
+ }
+ } else {
+ IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+ return 1;
+ }
+
+ p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+ if (!p->pe_data) {
+ if (p->pe->module)
+ module_put(p->pe->module);
+ return -ENOMEM;
+ }
+ memcpy(p->pe_data, pe_data, pe_data_len);
+ p->pe_data_len = pe_data_len;
+ }
return 0;
}
/*
- * Process received multicast message and create the corresponding
- * ip_vs_conn entries.
+ * Connection Add / Update.
+ * Common for version 0 and 1 reception of backup sync_conns.
+ * Param: ...
+ * timeout is in sec.
*/
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+ unsigned int flags, unsigned int state,
+ unsigned int protocol, unsigned int type,
+ const union nf_inet_addr *daddr, __be16 dport,
+ unsigned long timeout, __u32 fwmark,
+ struct ip_vs_sync_conn_options *opt)
{
- struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
- struct ip_vs_sync_conn *s;
- struct ip_vs_sync_conn_options *opt;
- struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp;
struct ip_vs_dest *dest;
+ struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ cp = ip_vs_conn_in_get(param);
+ else
+ cp = ip_vs_ct_in_get(param);
+
+ if (cp && param->pe_data) /* Free pe_data */
+ kfree(param->pe_data);
+ if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
+ param->vport, protocol, fwmark);
+
+ /* Set the approprite ativity flag */
+ if (protocol == IPPROTO_TCP) {
+ if (state != IP_VS_TCP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ } else if (protocol == IPPROTO_SCTP) {
+ if (state != IP_VS_SCTP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ if (!cp) {
+ if (param->pe_data)
+ kfree(param->pe_data);
+ IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+ return;
+ }
+ } else if (!cp->dest) {
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+ (cp->state != state)) {
+ /* update active/inactive flag for the connection */
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+ (cp->state != state)) {
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
+
+ if (opt)
+ memcpy(&cp->in_seq, opt, sizeof(*opt));
+ atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+ cp->state = state;
+ cp->old_state = cp->state;
+ /*
+ * For Ver 0 messages style
+ * - Not possible to recover the right timeout for templates
+ * - can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ * Ver 1 messages style.
+ * - No problem.
+ */
+ if (timeout) {
+ if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+ timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+ cp->timeout = timeout*HZ;
+ } else {
+ struct ip_vs_proto_data *pd;
+
+ pd = ip_vs_proto_data_get(net, protocol);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+ cp->timeout = pd->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ }
+ ip_vs_conn_put(cp);
+}
+
+/*
+ * Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+ const size_t buflen)
+{
+ struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+ struct ip_vs_sync_conn_v0 *s;
+ struct ip_vs_sync_conn_options *opt;
+ struct ip_vs_protocol *pp;
struct ip_vs_conn_param param;
char *p;
int i;
- if (buflen < sizeof(struct ip_vs_sync_mesg)) {
- IP_VS_ERR_RL("sync message header too short\n");
- return;
- }
-
- /* Convert size back to host byte order */
- m->size = ntohs(m->size);
-
- if (buflen != m->size) {
- IP_VS_ERR_RL("bogus sync message size\n");
- return;
- }
-
- /* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
- IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
- m->syncid);
- return;
- }
-
- p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+ p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
for (i=0; i<m->nr_conns; i++) {
unsigned flags, state;
if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
return;
}
- s = (struct ip_vs_sync_conn *) p;
+ s = (struct ip_vs_sync_conn_v0 *) p;
flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
flags &= ~IP_VS_CONN_F_HASHED;
if (flags & IP_VS_CONN_F_SEQ_MASK) {
opt = (struct ip_vs_sync_conn_options *)&s[1];
p += FULL_CONN_SIZE;
if (p > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn options in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
return;
}
} else {
@@ -362,118 +864,286 @@
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
pp = ip_vs_proto_get(s->protocol);
if (!pp) {
- IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
s->protocol);
continue;
}
if (state >= pp->num_states) {
- IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
pp->name, state);
continue;
}
} else {
/* protocol in templates is not used for state/timeout */
- pp = NULL;
if (state > 0) {
- IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
state = 0;
}
}
- {
- if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
- (union nf_inet_addr *)&s->caddr,
- s->cport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport, ¶m)) {
- pr_err("ip_vs_conn_fill_param_sync failed");
- return;
- }
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
- cp = ip_vs_conn_in_get(¶m);
- else
- cp = ip_vs_ct_in_get(¶m);
- }
- if (!cp) {
- /*
- * Find the appropriate destination for the connection.
- * If it is not found the connection will remain unbound
- * but still handled.
- */
- dest = ip_vs_find_dest(AF_INET,
- (union nf_inet_addr *)&s->daddr,
- s->dport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport,
- s->protocol);
- /* Set the approprite ativity flag */
- if (s->protocol == IPPROTO_TCP) {
- if (state != IP_VS_TCP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- } else if (s->protocol == IPPROTO_SCTP) {
- if (state != IP_VS_SCTP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- }
- cp = ip_vs_conn_new(¶m,
- (union nf_inet_addr *)&s->daddr,
- s->dport, flags, dest);
- if (dest)
- atomic_dec(&dest->refcnt);
- if (!cp) {
- pr_err("ip_vs_conn_new failed\n");
- return;
- }
- } else if (!cp->dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
- (cp->state != state)) {
- /* update active/inactive flag for the connection */
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_TCP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags |= IP_VS_CONN_F_INACTIVE;
- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state == IP_VS_TCP_S_ESTABLISHED)) {
- atomic_inc(&dest->activeconns);
- atomic_dec(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
- }
- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
- (cp->state != state)) {
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_SCTP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
- }
- }
+ ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+ (const union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (const union nf_inet_addr *)&s->vaddr,
+ s->vport, ¶m);
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
- cp->state = state;
- cp->old_state = cp->state;
- /*
- * We can not recover the right timeout for templates
- * in all cases, we can not find the right fwmark
- * virtual service. If needed, we can do it for
- * non-fwmark persistent services.
- */
- if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
- else
- cp->timeout = (3*60*HZ);
- ip_vs_conn_put(cp);
+ /* Send timeout as Zero */
+ ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
+ (union nf_inet_addr *)&s->daddr, s->dport,
+ 0, 0, opt);
+ }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+ __u32 *opt_flags,
+ struct ip_vs_sync_conn_options *opt)
+{
+ struct ip_vs_sync_conn_options *topt;
+
+ topt = (struct ip_vs_sync_conn_options *)p;
+
+ if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+ IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+ return -EINVAL;
+ }
+ if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+ IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+ return -EINVAL;
+ }
+ ntoh_seq(&topt->in_seq, &opt->in_seq);
+ ntoh_seq(&topt->out_seq, &opt->out_seq);
+ *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+ return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+ __u8 **data, unsigned int maxlen,
+ __u32 *opt_flags, __u32 flag)
+{
+ if (plen > maxlen) {
+ IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+ return -EINVAL;
+ }
+ if (*opt_flags & flag) {
+ IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+ return -EINVAL;
+ }
+ *data_len = plen;
+ *data = p;
+ *opt_flags |= flag;
+ return 0;
+}
+/*
+ * Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+{
+ struct ip_vs_sync_conn_options opt;
+ union ip_vs_sync_conn *s;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn_param param;
+ __u32 flags;
+ unsigned int af, state, pe_data_len=0, pe_name_len=0;
+ __u8 *pe_data=NULL, *pe_name=NULL;
+ __u32 opt_flags=0;
+ int retc=0;
+
+ s = (union ip_vs_sync_conn *) p;
+
+ if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+ af = AF_INET6;
+ p += sizeof(struct ip_vs_sync_v6);
+#else
+ IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+ retc = 10;
+ goto out;
+#endif
+ } else if (!s->v4.type) {
+ af = AF_INET;
+ p += sizeof(struct ip_vs_sync_v4);
+ } else {
+ return -10;
+ }
+ if (p > msg_end)
+ return -20;
+
+ /* Process optional params check Type & Len. */
+ while (p < msg_end) {
+ int ptype;
+ int plen;
+
+ if (p+2 > msg_end)
+ return -30;
+ ptype = *(p++);
+ plen = *(p++);
+
+ if (!plen || ((p + plen) > msg_end))
+ return -40;
+ /* Handle seq option p = param data */
+ switch (ptype & ~IPVS_OPT_F_PARAM) {
+ case IPVS_OPT_SEQ_DATA:
+ if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+ return -50;
+ break;
+
+ case IPVS_OPT_PE_DATA:
+ if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+ IP_VS_PEDATA_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_DATA))
+ return -60;
+ break;
+
+ case IPVS_OPT_PE_NAME:
+ if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+ IP_VS_PENAME_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_NAME))
+ return -70;
+ break;
+
+ default:
+ /* Param data mandatory ? */
+ if (!(ptype & IPVS_OPT_F_PARAM)) {
+ IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+ ptype & ~IPVS_OPT_F_PARAM);
+ retc = 20;
+ goto out;
+ }
+ }
+ p += plen; /* Next option */
+ }
+
+ /* Get flags and Mask off unsupported */
+ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+ flags |= IP_VS_CONN_F_SYNC;
+ state = ntohs(s->v4.state);
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->v4.protocol);
+ if (!pp) {
+ IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+ s->v4.protocol);
+ retc = 30;
+ goto out;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+ pp->name, state);
+ retc = 40;
+ goto out;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ if (state > 0) {
+ IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+ state);
+ state = 0;
+ }
+ }
+ if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
+ pe_data_len, pe_name, pe_name_len)) {
+ retc = 50;
+ goto out;
+ }
+ /* If only IPv4, just silent skip IPv6 */
+ if (af == AF_INET)
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
+ (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+ ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
+#ifdef CONFIG_IP_VS_IPV6
+ else
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
+ (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+ ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
+#endif
+ return 0;
+ /* Error exit */
+out:
+ IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+ return retc;
+
+}
+/*
+ * Process received multicast message and create the corresponding
+ * ip_vs_conn entries.
+ * Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+ const size_t buflen)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+ __u8 *p, *msg_end;
+ int i, nr_conns;
+
+ if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+ IP_VS_DBG(2, "BACKUP, message header too short\n");
+ return;
+ }
+ /* Convert size back to host byte order */
+ m2->size = ntohs(m2->size);
+
+ if (buflen != m2->size) {
+ IP_VS_DBG(2, "BACKUP, bogus message size\n");
+ return;
+ }
+ /* SyncID sanity check */
+ if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+ IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+ return;
+ }
+ /* Handle version 1 message */
+ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+ && (m2->spare == 0)) {
+
+ msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+ nr_conns = m2->nr_conns;
+
+ for (i=0; i<nr_conns; i++) {
+ union ip_vs_sync_conn *s;
+ unsigned size;
+ int retc;
+
+ p = msg_end;
+ if (p + sizeof(s->v4) > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+ return;
+ }
+ s = (union ip_vs_sync_conn *)p;
+ size = ntohs(s->v4.ver_size) & SVER_MASK;
+ msg_end = p + size;
+ /* Basic sanity checks */
+ if (msg_end > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
+ return;
+ }
+ if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+ ntohs(s->v4.ver_size) >> SVER_SHIFT);
+ return;
+ }
+ /* Process a single sync_conn */
+ retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ if (retc < 0) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+ retc);
+ return;
+ }
+ /* Make sure we have 32 bit alignment */
+ msg_end = p + ((size + 3) & ~3);
+ }
+ } else {
+ /* Old type of message */
+ ip_vs_process_message_v0(net, buffer, buflen);
+ return;
}
}
@@ -511,8 +1181,10 @@
{
struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@
* Set the maximum length of sync message according to the
* specified interface's MTU.
*/
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
int num;
if (sync_state == IP_VS_STATE_MASTER) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+ if (!dev)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
sizeof(struct udphdr) -
SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+ ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
+ "message %d.\n", ipvs->send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+ if (!dev)
return -ENODEV;
- sync_recv_mesg_maxlen = dev->mtu -
+ ipvs->recv_mesg_maxlen = dev->mtu -
sizeof(struct iphdr) - sizeof(struct udphdr);
IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", sync_recv_mesg_maxlen);
+ "message %d.\n", ipvs->recv_mesg_maxlen);
}
return 0;
@@ -569,6 +1244,7 @@
static int
join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
{
+ struct net *net = sock_net(sk);
struct ip_mreqn mreq;
struct net_device *dev;
int ret;
@@ -576,7 +1252,8 @@
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -593,11 +1270,13 @@
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
+ struct net *net = sock_net(sock->sk);
struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,8 +1298,9 @@
/*
* Set up sending multicast socket over UDP
*/
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -631,7 +1311,7 @@
return ERR_PTR(result);
}
- result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -640,7 +1320,7 @@
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
@@ -664,8 +1344,9 @@
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -689,7 +1370,7 @@
/* join the multicast group */
result = join_mcast_group(sock->sk,
(struct in_addr *) &mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn);
+ ipvs->backup_mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -760,20 +1441,21 @@
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+ ipvs->master_mcast_ifn, ipvs->master_syncid);
while (!kthread_should_stop()) {
- while ((sb = sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs))) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- /* check if entries stay in curr_sb for 2 seconds */
- sb = get_curr_sync_buff(2 * HZ);
+ /* check if entries stay in ipvs->sync_buff for 2 seconds */
+ sb = get_curr_sync_buff(ipvs, 2 * HZ);
if (sb) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@
}
/* clean up the sync_buff queue */
- while ((sb=sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs)))
ip_vs_sync_buff_release(sb);
- }
/* clean up the current sync_buff */
- if ((sb = get_curr_sync_buff(0))) {
+ sb = get_curr_sync_buff(ipvs, 0);
+ if (sb)
ip_vs_sync_buff_release(sb);
- }
/* release the sending multicast socket */
sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+ ipvs->backup_mcast_ifn, ipvs->backup_syncid);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- sync_recv_mesg_maxlen);
+ ipvs->recv_mesg_maxlen);
if (len <= 0) {
pr_err("receiving message error\n");
break;
@@ -826,7 +1508,7 @@
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(tinfo->buf, len);
+ ip_vs_process_message(tinfo->net, tinfo->buf, len);
local_bh_enable();
}
}
@@ -840,41 +1522,42 @@
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **realtask, *task;
struct socket *sock;
+ struct netns_ipvs *ipvs = net_ipvs(net);
char *name, *buf = NULL;
int (*threadfn)(void *data);
int result = -ENOMEM;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
+ sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
- if (sync_master_thread)
+ if (ipvs->master_thread)
return -EEXIST;
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- realtask = &sync_master_thread;
- name = "ipvs_syncmaster";
+ strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->master_mcast_ifn));
+ ipvs->master_syncid = syncid;
+ realtask = &ipvs->master_thread;
+ name = "ipvs_master:%d";
threadfn = sync_thread_master;
- sock = make_send_sock();
+ sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
- if (sync_backup_thread)
+ if (ipvs->backup_thread)
return -EEXIST;
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- realtask = &sync_backup_thread;
- name = "ipvs_syncbackup";
+ strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->backup_mcast_ifn));
+ ipvs->backup_syncid = syncid;
+ realtask = &ipvs->backup_thread;
+ name = "ipvs_backup:%d";
threadfn = sync_thread_backup;
- sock = make_receive_sock();
+ sock = make_receive_sock(net);
} else {
return -EINVAL;
}
@@ -884,9 +1567,9 @@
goto out;
}
- set_sync_mesg_maxlen(state);
+ set_sync_mesg_maxlen(net, state);
if (state == IP_VS_STATE_BACKUP) {
- buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
if (!buf)
goto outsocket;
}
@@ -895,10 +1578,11 @@
if (!tinfo)
goto outbuf;
+ tinfo->net = net;
tinfo->sock = sock;
tinfo->buf = buf;
- task = kthread_run(threadfn, tinfo, name);
+ task = kthread_run(threadfn, tinfo, name, ipvs->gen);
if (IS_ERR(task)) {
result = PTR_ERR(task);
goto outtinfo;
@@ -906,7 +1590,7 @@
/* mark as active */
*realtask = task;
- ip_vs_sync_state |= state;
+ ipvs->sync_state |= state;
/* increase the module use count */
ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@
}
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
if (state == IP_VS_STATE_MASTER) {
- if (!sync_master_thread)
+ if (!ipvs->master_thread)
return -ESRCH;
pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(sync_master_thread));
+ task_pid_nr(ipvs->master_thread));
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@
* progress of stopping the master sync daemon.
*/
- spin_lock_bh(&ip_vs_sync_lock);
- ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
- spin_unlock_bh(&ip_vs_sync_lock);
- kthread_stop(sync_master_thread);
- sync_master_thread = NULL;
+ spin_lock_bh(&ipvs->sync_lock);
+ ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ipvs->sync_lock);
+ kthread_stop(ipvs->master_thread);
+ ipvs->master_thread = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!sync_backup_thread)
+ if (!ipvs->backup_thread)
return -ESRCH;
pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(sync_backup_thread));
+ task_pid_nr(ipvs->backup_thread));
- ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
- kthread_stop(sync_backup_thread);
- sync_backup_thread = NULL;
+ ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(ipvs->backup_thread);
+ ipvs->backup_thread = NULL;
} else {
return -EINVAL;
}
@@ -965,3 +1651,42 @@
return 0;
}
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ INIT_LIST_HEAD(&ipvs->sync_queue);
+ spin_lock_init(&ipvs->sync_lock);
+ spin_lock_init(&ipvs->sync_buff_lock);
+
+ ipvs->sync_mcast_addr.sin_family = AF_INET;
+ ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+ ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+ return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+ stop_sync_thread(net, IP_VS_STATE_MASTER);
+ stop_sync_thread(net, IP_VS_STATE_BACKUP);
+}
+
+static struct pernet_operations ipvs_sync_ops = {
+ .init = __ip_vs_sync_init,
+ .exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+ return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+ unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3f..1f2a4e3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -175,7 +175,6 @@
.fl4_tos = RT_TOS(iph->tos),
.mark = skb->mark,
};
- struct rtable *rt;
if (ip_route_output_key(net, &rt, &fl))
return 0;
@@ -390,7 +389,8 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -543,7 +543,8 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
"ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -773,8 +774,8 @@
df |= (old_iph->frag_off & htons(IP_DF));
- if ((old_iph->frag_off & htons(IP_DF))
- && mtu < ntohs(old_iph->tot_len)) {
+ if ((old_iph->frag_off & htons(IP_DF) &&
+ mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
@@ -886,7 +887,8 @@
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
- if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+ if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+ !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -991,7 +993,8 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+ !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
@@ -1272,7 +1276,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 0000000..4e99cca
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
+/*
+ * broadcast connection tracking helper
+ *
+ * (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int timeout)
+{
+ struct nf_conntrack_expect *exp;
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+ struct in_device *in_dev;
+ struct nf_conn_help *help = nfct_help(ct);
+ __be32 mask = 0;
+
+ /* we're only interested in locally generated packets */
+ if (skb->sk == NULL)
+ goto out;
+ if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+ goto out;
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ goto out;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rt->dst.dev);
+ if (in_dev != NULL) {
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_broadcast == iph->daddr) {
+ mask = ifa->ifa_mask;
+ break;
+ }
+ } endfor_ifa(in_dev);
+ }
+ rcu_read_unlock();
+
+ if (mask == 0)
+ goto out;
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL)
+ goto out;
+
+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+ exp->mask.src.u3.ip = mask;
+ exp->mask.src.u.udp.port = htons(0xFFFF);
+
+ exp->expectfn = NULL;
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
+ exp->helper = NULL;
+
+ nf_ct_expect_related(exp);
+ nf_ct_expect_put(exp);
+
+ nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e615119..1909311 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
@@ -282,6 +283,11 @@
static void death_by_timeout(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
+ struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp && tstamp->stop == 0)
+ tstamp->stop = ktime_to_ns(ktime_get_real());
if (!test_bit(IPS_DYING_BIT, &ct->status) &&
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
+ struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -486,8 +493,16 @@
ct->timeout.expires += jiffies;
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ ct->status |= IPS_CONFIRMED;
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp) {
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp((struct sk_buff *)skb);
+
+ tstamp->start = ktime_to_ns(skb->tstamp);
+ }
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@
* and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
*/
memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
- sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+ offsetof(struct nf_conn, proto) -
+ offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
spin_lock_init(&ct->lock);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1185,6 +1202,11 @@
static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+ struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(i);
+ if (tstamp && tstamp->stop == 0)
+ tstamp->stop = ktime_to_ns(ktime_get_real());
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1201,9 +1223,9 @@
return 1;
}
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
{
- if (vmalloced)
+ if (is_vmalloc_addr(hash))
vfree(hash);
else
free_pages((unsigned long)hash,
@@ -1270,8 +1292,7 @@
goto i_see_dead_people;
}
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
@@ -1300,21 +1321,18 @@
}
}
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
{
struct hlist_nulls_head *hash;
unsigned int nr_slots, i;
size_t sz;
- *vmalloced = 0;
-
BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
sz = nr_slots * sizeof(struct hlist_nulls_head);
hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
get_order(sz));
if (!hash) {
- *vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
@@ -1330,7 +1348,7 @@
int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
{
- int i, bucket, vmalloced, old_vmalloced;
+ int i, bucket;
unsigned int hashsize, old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
@@ -1347,7 +1365,7 @@
if (!hashsize)
return -EINVAL;
- hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+ hash = nf_ct_alloc_hashtable(&hashsize, 1);
if (!hash)
return -ENOMEM;
@@ -1369,15 +1387,13 @@
}
}
old_size = init_net.ct.htable_size;
- old_vmalloced = init_net.ct.hash_vmalloc;
old_hash = init_net.ct.hash;
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
- init_net.ct.hash_vmalloc = vmalloced;
init_net.ct.hash = hash;
spin_unlock_bh(&nf_conntrack_lock);
- nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+ nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1490,8 +1506,7 @@
}
net->ct.htable_size = nf_conntrack_htable_size;
- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
- &net->ct.hash_vmalloc, 1);
+ net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1503,6 +1518,9 @@
ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto err_acct;
+ ret = nf_conntrack_tstamp_init(net);
+ if (ret < 0)
+ goto err_tstamp;
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
@@ -1510,12 +1528,13 @@
return 0;
err_ecache:
+ nf_conntrack_tstamp_fini(net);
+err_tstamp:
nf_conntrack_acct_fini(net);
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
err_hash:
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0b..cd1e8e0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@
const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
- atomic_inc(&exp->use);
+ /* two references : one for hash insert, one for the timer */
+ atomic_add(2, &exp->use);
if (master_help) {
hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
if (master_help) {
- p = &master_help->helper->expect_policy[exp->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[exp->class];
exp->timeout.expires = jiffies + p->timeout * HZ;
}
add_timer(&exp->timeout);
- atomic_inc(&exp->use);
NF_CT_STAT_INC(net, expect_create);
}
@@ -369,7 +372,10 @@
if (!del_timer(&i->timeout))
return 0;
- p = &master_help->helper->expect_policy[i->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[i->class];
i->timeout.expires = jiffies + p->timeout * HZ;
add_timer(&i->timeout);
return 1;
@@ -407,7 +413,10 @@
}
/* Will be over limit? */
if (master_help) {
- p = &master_help->helper->expect_policy[expect->class];
+ p = &rcu_dereference_protected(
+ master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ )->expect_policy[expect->class];
if (p->max_expected &&
master_help->expecting[expect->class] >= p->max_expected) {
evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
if (n)
return n;
}
@@ -491,11 +500,11 @@
struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
}
return head;
}
@@ -630,8 +639,7 @@
}
net->ct.expect_count = 0;
- net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &net->ct.expect_vmalloc, 0);
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
if (net->ct.expect_hash == NULL)
goto err1;
@@ -653,8 +661,7 @@
if (net_eq(net, &init_net))
kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
err1:
return err;
}
@@ -666,6 +673,5 @@
rcu_barrier(); /* Wait for call_rcu() before destroy */
kmem_cache_destroy(nf_ct_expect_cachep);
}
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450..80a23ed 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@
/* This assumes that extended areas in conntrack for the types
whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
for (i = min; i <= max; i++) {
- t1 = nf_ct_ext_types[i];
+ t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+ lockdep_is_held(&nf_ct_ext_type_mutex));
if (!t1)
continue;
- t1->alloc_size = sizeof(struct nf_ct_ext)
- + ALIGN(sizeof(struct nf_ct_ext), t1->align)
- + t1->len;
+ t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+ t1->len;
for (j = 0; j < NF_CT_EXT_NUM; j++) {
- t2 = nf_ct_ext_types[j];
+ t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+ lockdep_is_held(&nf_ct_ext_type_mutex));
if (t2 == NULL || t2 == t1 ||
(t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4c..1bdfea3 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@
static struct hlist_head *nf_ct_helper_hash __read_mostly;
static unsigned int nf_ct_helper_hsize __read_mostly;
static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
/* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && help->helper == me) {
+ if (help && rcu_dereference_protected(
+ help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ ) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
rcu_assign_pointer(help->helper, NULL);
}
@@ -210,7 +212,10 @@
hlist_for_each_entry_safe(exp, n, next,
&net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
- if ((help->helper == me || exp->helper == me) &&
+ if ((rcu_dereference_protected(
+ help->helper,
+ lockdep_is_held(&nf_conntrack_lock)
+ ) == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@
int err;
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
- nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
- &nf_ct_helper_vmalloc, 0);
+ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
if (!nf_ct_helper_hash)
return -ENOMEM;
@@ -273,14 +277,12 @@
return 0;
err1:
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
return err;
}
void nf_conntrack_helper_fini(void)
{
nf_ct_extend_unregister(&helper_extend);
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde01..4c8f30a 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/route.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@
MODULE_ALIAS_NFCT_HELPER("netbios_ns");
static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, 0400);
+module_param(timeout, uint, S_IRUSR);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-static int help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
- struct nf_conntrack_expect *exp;
- struct iphdr *iph = ip_hdr(skb);
- struct rtable *rt = skb_rtable(skb);
- struct in_device *in_dev;
- __be32 mask = 0;
-
- /* we're only interested in locally generated packets */
- if (skb->sk == NULL)
- goto out;
- if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
- goto out;
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- goto out;
-
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(rt->dst.dev);
- if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_broadcast == iph->daddr) {
- mask = ifa->ifa_mask;
- break;
- }
- } endfor_ifa(in_dev);
- }
- rcu_read_unlock();
-
- if (mask == 0)
- goto out;
-
- exp = nf_ct_expect_alloc(ct);
- if (exp == NULL)
- goto out;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
- exp->mask.src.u3.ip = mask;
- exp->mask.src.u.udp.port = htons(0xFFFF);
-
- exp->expectfn = NULL;
- exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
- exp->helper = NULL;
-
- nf_ct_expect_related(exp);
- nf_ct_expect_put(exp);
-
- nf_ct_refresh(ct, skb, timeout * HZ);
-out:
- return NF_ACCEPT;
-}
-
static struct nf_conntrack_expect_policy exp_policy = {
.max_expected = 1,
};
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+}
+
static struct nf_conntrack_helper helper __read_mostly = {
.name = "netbios-ns",
- .tuple.src.l3num = AF_INET,
+ .tuple.src.l3num = NFPROTO_IPV4,
.tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
.me = THIS_MODULE,
- .help = help,
+ .help = netbios_ns_help,
.expect_policy = &exp_policy,
};
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 93297aa..3fec12c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@
return -1;
}
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_count;
+ const struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (!tstamp)
+ return 0;
+
+ nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+ if (tstamp->stop != 0) {
+ NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+ cpu_to_be64(tstamp->stop));
+ }
+ nla_nest_end(skb, nest_count);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
#ifdef CONFIG_NF_CONNTRACK_MARK
static inline int
ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@
ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@
}
static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+ return 0;
+ return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+ return 0;
+#endif
+}
+
+static inline size_t
ctnetlink_nlmsg_size(const struct nf_conn *ct)
{
return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@
+ nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
+ ctnetlink_counters_size(ct)
+ + ctnetlink_timestamp_size(ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ nla_total_size(0) /* CTA_PROTOINFO */
+ nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@
if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1357,6 +1400,7 @@
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
/* we must add conntrack extensions before confirmation. */
ct->status |= IPS_CONFIRMED;
@@ -1375,6 +1419,7 @@
}
#endif
+ memset(&ct->proto, 0, sizeof(ct->proto));
if (cda[CTA_PROTOINFO]) {
err = ctnetlink_change_protoinfo(ct, cda);
if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74..5701c8d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@
int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
+ struct nf_conntrack_l3proto *old;
if (proto->l3proto >= AF_MAX)
return -EBUSY;
@@ -174,7 +175,9 @@
return -EINVAL;
mutex_lock(&nf_ct_proto_mutex);
- if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+ old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+ lockdep_is_held(&nf_ct_proto_mutex));
+ if (old != &nf_conntrack_l3proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
@@ -201,7 +204,9 @@
BUG_ON(proto->l3proto >= AF_MAX);
mutex_lock(&nf_ct_proto_mutex);
- BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+ BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@
mutex_lock(&nf_ct_proto_mutex);
if (!nf_ct_protos[l4proto->l3proto]) {
/* l3proto may be loaded latter. */
- struct nf_conntrack_l4proto **proto_array;
+ struct nf_conntrack_l4proto __rcu **proto_array;
int i;
proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@
}
for (i = 0; i < MAX_NF_CT_PROTO; i++)
- proto_array[i] = &nf_conntrack_l4proto_generic;
+ RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
/* Before making proto_array visible to lockless readers,
* we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@
smp_wmb();
nf_ct_protos[l4proto->l3proto] = proto_array;
- } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
- &nf_conntrack_l4proto_generic) {
+ } else if (rcu_dereference_protected(
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != &nf_conntrack_l4proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
@@ -331,7 +338,10 @@
BUG_ON(l4proto->l3proto >= PF_MAX);
mutex_lock(&nf_ct_proto_mutex);
- BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+ BUG_ON(rcu_dereference_protected(
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ lockdep_is_held(&nf_ct_proto_mutex)
+ ) != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560..9ae57c5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
ct->proto.dccp.state = CT_DCCP_NONE;
+ ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+ ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+ ct->proto.dccp.handshake_seq = 0;
return true;
out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2..6f4ee70 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@
test_bit(SCTP_CID_COOKIE_ACK, map))
return false;
+ memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
new_state = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73..6f38d0e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@
BUG_ON(th == NULL);
/* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
+ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@
}
if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/* SYN packet */
ct->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@
ct->proto.tcp.seen[0].td_end;
tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
- ct->proto.tcp.seen[1].flags = 0;
} else if (nf_ct_tcp_loose == 0) {
/* Don't try to pick up connections. */
return false;
} else {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/*
* We are in the middle of a connection,
* its history is lost for us.
@@ -1107,7 +1106,6 @@
ct->proto.tcp.seen[0].td_maxend =
ct->proto.tcp.seen[0].td_end +
ct->proto.tcp.seen[0].td_maxwin;
- ct->proto.tcp.seen[0].td_scale = 0;
/* We assume SACK and liberal window checking to handle
* window scaling */
@@ -1116,13 +1114,7 @@
IP_CT_TCP_FLAG_BE_LIBERAL;
}
- ct->proto.tcp.seen[1].td_end = 0;
- ct->proto.tcp.seen[1].td_maxend = 0;
- ct->proto.tcp.seen[1].td_maxwin = 0;
- ct->proto.tcp.seen[1].td_scale = 0;
-
/* tcp_packet will set them */
- ct->proto.tcp.state = TCP_CONNTRACK_NONE;
ct->proto.tcp.last_index = TCP_NONE_SET;
pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 0000000..6e545e2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
+/*
+ * SNMP service broadcast connection tracking helper
+ *
+ * (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define SNMP_PORT 161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, S_IRUSR);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+ nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+
+ nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+ if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+ return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+ return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+ .max_expected = 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+ .name = "snmp",
+ .tuple.src.l3num = NFPROTO_IPV4,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ .help = snmp_conntrack_help,
+ .expect_policy = &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+ exp_policy.timeout = timeout;
+ return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+ nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f..0ae1428 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <linux/rculist_nulls.h>
MODULE_LICENSE("GPL");
@@ -45,6 +47,7 @@
struct ct_iter_state {
struct seq_net_private p;
unsigned int bucket;
+ u_int64_t time_now;
};
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@
for (st->bucket = 0;
st->bucket < net->ct.htable_size;
st->bucket++) {
- n = rcu_dereference(net->ct.hash[st->bucket].first);
+ n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -69,13 +72,15 @@
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= net->ct.htable_size)
return NULL;
}
- head = rcu_dereference(net->ct.hash[st->bucket].first);
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(
+ &net->ct.hash[st->bucket]));
}
return head;
}
@@ -93,6 +98,9 @@
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
+ struct ct_iter_state *st = seq->private;
+
+ st->time_now = ktime_to_ns(ktime_get_real());
rcu_read_lock();
return ct_get_idx(seq, *pos);
}
@@ -132,6 +140,34 @@
}
#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+ struct ct_iter_state *st = s->private;
+ struct nf_conn_tstamp *tstamp;
+ s64 delta_time;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp) {
+ delta_time = st->time_now - tstamp->start;
+ if (delta_time > 0)
+ delta_time = div_s64(delta_time, NSEC_PER_SEC);
+ else
+ delta_time = 0;
+
+ return seq_printf(s, "delta-time=%llu ",
+ (unsigned long long)delta_time);
+ }
+ return 0;
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+ return 0;
+}
+#endif
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -200,6 +236,9 @@
goto release;
#endif
+ if (ct_show_delta_time(s, ct))
+ goto release;
+
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
goto release;
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 0000000..af7dd31
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_timestamp",
+ .data = &init_net.ct.sysctl_tstamp,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_tstamp),
+ .align = __alignof__(struct nf_conn_tstamp),
+ .id = NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_tstamp;
+
+ net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.tstamp_sysctl_header) {
+ printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+ goto out_register;
+ }
+ return 0;
+
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_ct_extend_register(&tstamp_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+ "extension\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_tstamp_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+ return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+ nf_conntrack_tstamp_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&tstamp_extend);
+}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393e..20c775c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -161,7 +161,8 @@
struct nf_logger *t;
int ret;
- logger = nf_loggers[*pos];
+ logger = rcu_dereference_protected(nf_loggers[*pos],
+ lockdep_is_held(&nf_log_mutex));
if (!logger)
ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
- logger = nf_loggers[tindex];
+ logger = rcu_dereference_protected(nf_loggers[tindex],
+ lockdep_is_held(&nf_log_mutex));
if (!logger)
table->data = "NONE";
else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed..5ab22e2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@
int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
{
int ret;
+ const struct nf_queue_handler *old;
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
- if (queue_handler[pf] == qh)
+ old = rcu_dereference_protected(queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex));
+ if (old == qh)
ret = -EEXIST;
- else if (queue_handler[pf])
+ else if (old)
ret = -EBUSY;
else {
rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
{
+ const struct nf_queue_handler *old;
+
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
- if (queue_handler[pf] && queue_handler[pf] != qh) {
+ old = rcu_dereference_protected(queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex));
+ if (old && old != qh) {
mutex_unlock(&queue_handler_mutex);
return -EINVAL;
}
@@ -73,7 +80,10 @@
mutex_lock(&queue_handler_mutex);
for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
- if (queue_handler[pf] == qh)
+ if (rcu_dereference_protected(
+ queue_handler[pf],
+ lockdep_is_held(&queue_handler_mutex)
+ ) == qh)
rcu_assign_pointer(queue_handler[pf], NULL);
}
mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@
int (*okfn)(struct sk_buff *),
unsigned int queuenum)
{
- int status;
+ int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
#ifdef CONFIG_BRIDGE_NETFILTER
struct net_device *physindev;
@@ -128,16 +138,20 @@
rcu_read_lock();
qh = rcu_dereference(queue_handler[pf]);
- if (!qh)
+ if (!qh) {
+ status = -ESRCH;
goto err_unlock;
+ }
afinfo = nf_get_afinfo(pf);
if (!afinfo)
goto err_unlock;
entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
- if (!entry)
+ if (!entry) {
+ status = -ENOMEM;
goto err_unlock;
+ }
*entry = (struct nf_queue_entry) {
.skb = skb,
@@ -151,11 +165,9 @@
/* If it's going away, ignore hook. */
if (!try_module_get(entry->elem->owner)) {
- rcu_read_unlock();
- kfree(entry);
- return 0;
+ status = -ECANCELED;
+ goto err_unlock;
}
-
/* Bump dev refs so they don't vanish while packet is out */
if (indev)
dev_hold(indev);
@@ -182,14 +194,13 @@
goto err;
}
- return 1;
+ return 0;
err_unlock:
rcu_read_unlock();
err:
- kfree_skb(skb);
kfree(entry);
- return 1;
+ return status;
}
int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@
unsigned int queuenum)
{
struct sk_buff *segs;
+ int err;
+ unsigned int queued;
if (!skb_is_gso(skb))
return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@
}
segs = skb_gso_segment(skb, 0);
- kfree_skb(skb);
+ /* Does not use PTR_ERR to limit the number of error codes that can be
+ * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
+ * 'ignore this hook'.
+ */
if (IS_ERR(segs))
- return 1;
+ return -EINVAL;
+ queued = 0;
+ err = 0;
do {
struct sk_buff *nskb = segs->next;
segs->next = NULL;
- if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
- queuenum))
+ if (err == 0)
+ err = __nf_queue(segs, elem, pf, hook, indev,
+ outdev, okfn, queuenum);
+ if (err == 0)
+ queued++;
+ else
kfree_skb(segs);
segs = nskb;
} while (segs);
- return 1;
+
+ /* also free orig skb if only some segments were queued */
+ if (unlikely(err && queued))
+ err = 0;
+ if (err == 0)
+ kfree_skb(skb);
+ return err;
}
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@
struct sk_buff *skb = entry->skb;
struct list_head *elem = &entry->elem->list;
const struct nf_afinfo *afinfo;
+ int err;
rcu_read_lock();
@@ -270,10 +299,17 @@
local_bh_enable();
break;
case NF_QUEUE:
- if (!__nf_queue(skb, elem, entry->pf, entry->hook,
- entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_BITS))
- goto next_hook;
+ err = __nf_queue(skb, elem, entry->pf, entry->hook,
+ entry->indev, entry->outdev, entry->okfn,
+ verdict >> NF_VERDICT_QBITS);
+ if (err < 0) {
+ if (err == -ECANCELED)
+ goto next_hook;
+ if (err == -ESRCH &&
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+ goto next_hook;
+ kfree_skb(skb);
+ }
break;
case NF_STOLEN:
default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b..91592da 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -874,19 +874,19 @@
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
if (!hlist_empty(&instance_table[st->bucket]))
- return rcu_dereference_bh(instance_table[st->bucket].first);
+ return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
}
return NULL;
}
static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
{
- h = rcu_dereference_bh(h->next);
+ h = rcu_dereference_bh(hlist_next_rcu(h));
while (!h) {
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = rcu_dereference_bh(instance_table[st->bucket].first);
+ h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
}
return h;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d1..b83123f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@
{
struct sk_buff *nskb;
struct nfqnl_instance *queue;
- int err;
+ int err = -ENOBUFS;
/* rcu_read_lock()ed by nf_hook_slow() */
queue = instance_lookup(queuenum);
- if (!queue)
+ if (!queue) {
+ err = -ESRCH;
goto err_out;
+ }
- if (queue->copy_mode == NFQNL_COPY_NONE)
+ if (queue->copy_mode == NFQNL_COPY_NONE) {
+ err = -EINVAL;
goto err_out;
+ }
nskb = nfqnl_build_packet_message(queue, entry);
- if (nskb == NULL)
+ if (nskb == NULL) {
+ err = -ENOMEM;
goto err_out;
-
+ }
spin_lock_bh(&queue->lock);
- if (!queue->peer_pid)
+ if (!queue->peer_pid) {
+ err = -EINVAL;
goto err_out_free_nskb;
-
+ }
if (queue->queue_total >= queue->queue_maxlen) {
queue->queue_dropped++;
if (net_ratelimit())
@@ -432,7 +438,7 @@
err_out_unlock:
spin_unlock_bh(&queue->lock);
err_out:
- return -1;
+ return err;
}
static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c942376..0a77d2f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/audit.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
struct compat_delta {
- struct compat_delta *next;
- unsigned int offset;
- int delta;
+ unsigned int offset; /* offset in kernel */
+ int delta; /* delta in 32bit user land */
};
struct xt_af {
@@ -49,7 +49,9 @@
struct list_head target;
#ifdef CONFIG_COMPAT
struct mutex compat_mutex;
- struct compat_delta *compat_offsets;
+ struct compat_delta *compat_tab;
+ unsigned int number; /* number of slots in compat_tab[] */
+ unsigned int cur; /* number of used slots in compat_tab[] */
#endif
};
@@ -414,54 +416,67 @@
EXPORT_SYMBOL_GPL(xt_check_match);
#ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
- struct compat_delta *tmp;
+ struct xt_af *xp = &xt[af];
- tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
- if (!tmp)
- return -ENOMEM;
-
- tmp->offset = offset;
- tmp->delta = delta;
-
- if (xt[af].compat_offsets) {
- tmp->next = xt[af].compat_offsets->next;
- xt[af].compat_offsets->next = tmp;
- } else {
- xt[af].compat_offsets = tmp;
- tmp->next = NULL;
+ if (!xp->compat_tab) {
+ if (!xp->number)
+ return -EINVAL;
+ xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+ if (!xp->compat_tab)
+ return -ENOMEM;
+ xp->cur = 0;
}
+
+ if (xp->cur >= xp->number)
+ return -EINVAL;
+
+ if (xp->cur)
+ delta += xp->compat_tab[xp->cur - 1].delta;
+ xp->compat_tab[xp->cur].offset = offset;
+ xp->compat_tab[xp->cur].delta = delta;
+ xp->cur++;
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_add_offset);
void xt_compat_flush_offsets(u_int8_t af)
{
- struct compat_delta *tmp, *next;
-
- if (xt[af].compat_offsets) {
- for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
- next = tmp->next;
- kfree(tmp);
- }
- xt[af].compat_offsets = NULL;
+ if (xt[af].compat_tab) {
+ vfree(xt[af].compat_tab);
+ xt[af].compat_tab = NULL;
+ xt[af].number = 0;
}
}
EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
{
- struct compat_delta *tmp;
- int delta;
+ struct compat_delta *tmp = xt[af].compat_tab;
+ int mid, left = 0, right = xt[af].cur - 1;
- for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
- if (tmp->offset < offset)
- delta += tmp->delta;
- return delta;
+ while (left <= right) {
+ mid = (left + right) >> 1;
+ if (offset > tmp[mid].offset)
+ left = mid + 1;
+ else if (offset < tmp[mid].offset)
+ right = mid - 1;
+ else
+ return mid ? tmp[mid - 1].delta : 0;
+ }
+ WARN_ON_ONCE(1);
+ return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+ xt[af].number = number;
+ xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
int xt_compat_match_offset(const struct xt_match *match)
{
u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@
*/
local_bh_enable();
+#ifdef CONFIG_AUDIT
+ if (audit_enabled) {
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG);
+ if (ab) {
+ audit_log_format(ab, "table=%s family=%u entries=%u",
+ table->name, table->af,
+ private->number);
+ audit_log_end(ab);
+ }
+ }
+#endif
+
return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@
mutex_init(&xt[i].mutex);
#ifdef CONFIG_COMPAT
mutex_init(&xt[i].compat_mutex);
- xt[i].compat_offsets = NULL;
+ xt[i].compat_tab = NULL;
#endif
INIT_LIST_HEAD(&xt[i].target);
INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 0000000..81802d2
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,204 @@
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
+ unsigned int proto, unsigned int offset)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE: {
+ const __be16 *pptr;
+ __be16 _ports[2];
+
+ pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
+ if (pptr == NULL) {
+ audit_log_format(ab, " truncated=1");
+ return;
+ }
+
+ audit_log_format(ab, " sport=%hu dport=%hu",
+ ntohs(pptr[0]), ntohs(pptr[1]));
+ }
+ break;
+
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6: {
+ const u8 *iptr;
+ u8 _ih[2];
+
+ iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
+ if (iptr == NULL) {
+ audit_log_format(ab, " truncated=1");
+ return;
+ }
+
+ audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
+ iptr[0], iptr[1]);
+
+ }
+ break;
+ }
+}
+
+static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+ struct iphdr _iph;
+ const struct iphdr *ih;
+
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (!ih) {
+ audit_log_format(ab, " truncated=1");
+ return;
+ }
+
+ audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
+ &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+
+ if (ntohs(ih->frag_off) & IP_OFFSET) {
+ audit_log_format(ab, " frag=1");
+ return;
+ }
+
+ audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+}
+
+static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+ struct ipv6hdr _ip6h;
+ const struct ipv6hdr *ih;
+ u8 nexthdr;
+ int offset;
+
+ ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+ if (!ih) {
+ audit_log_format(ab, " truncated=1");
+ return;
+ }
+
+ nexthdr = ih->nexthdr;
+ offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
+ &nexthdr);
+
+ audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+ &ih->saddr, &ih->daddr, nexthdr);
+
+ if (offset)
+ audit_proto(ab, skb, nexthdr, offset);
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_audit_info *info = par->targinfo;
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ if (ab == NULL)
+ goto errout;
+
+ audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
+ info->type, par->hooknum, skb->len,
+ par->in ? par->in->name : "?",
+ par->out ? par->out->name : "?");
+
+ if (skb->mark)
+ audit_log_format(ab, " mark=%#x", skb->mark);
+
+ if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+ audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+
+ if (par->family == NFPROTO_BRIDGE) {
+ switch (eth_hdr(skb)->h_proto) {
+ case __constant_htons(ETH_P_IP):
+ audit_ip4(ab, skb);
+ break;
+
+ case __constant_htons(ETH_P_IPV6):
+ audit_ip6(ab, skb);
+ break;
+ }
+ }
+ }
+
+ switch (par->family) {
+ case NFPROTO_IPV4:
+ audit_ip4(ab, skb);
+ break;
+
+ case NFPROTO_IPV6:
+ audit_ip6(ab, skb);
+ break;
+ }
+
+ audit_log_end(ab);
+
+errout:
+ return XT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_audit_info *info = par->targinfo;
+
+ if (info->type > XT_AUDIT_TYPE_MAX) {
+ pr_info("Audit type out of range (valid range: 0..%hhu)\n",
+ XT_AUDIT_TYPE_MAX);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static struct xt_target audit_tg_reg __read_mostly = {
+ .name = "AUDIT",
+ .family = NFPROTO_UNSPEC,
+ .target = audit_tg,
+ .targetsize = sizeof(struct xt_audit_info),
+ .checkentry = audit_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init audit_tg_init(void)
+{
+ return xt_register_target(&audit_tg_reg);
+}
+
+static void __exit audit_tg_exit(void)
+{
+ xt_unregister_target(&audit_tg_reg);
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4a..af9c4da 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Xtables: Qdisc classification");
MODULE_ALIAS("ipt_CLASSIFY");
MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
static unsigned int
classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@
return XT_CONTINUE;
}
-static struct xt_target classify_tg_reg __read_mostly = {
- .name = "CLASSIFY",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .table = "mangle",
- .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
- (1 << NF_INET_POST_ROUTING),
- .target = classify_tg,
- .targetsize = sizeof(struct xt_classify_target_info),
- .me = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+ {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_POST_ROUTING),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_ARP,
+ .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+ },
};
static int __init classify_tg_init(void)
{
- return xt_register_target(&classify_tg_reg);
+ return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
}
static void __exit classify_tg_exit(void)
{
- xt_unregister_target(&classify_tg_reg);
+ xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
}
module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e..3bdd443 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@
MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
MODULE_DESCRIPTION("Xtables: idle time monitor");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a414050..993de2b 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
static LIST_HEAD(xt_led_triggers);
static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1..d4f4b5d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@
if (info->queues_total > 1) {
if (par->family == NFPROTO_IPV4)
- queue = hash_v4(skb) % info->queues_total + queue;
+ queue = (((u64) hash_v4(skb) * info->queues_total) >>
+ 32) + queue;
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
else if (par->family == NFPROTO_IPV6)
- queue = hash_v6(skb) % info->queues_total + queue;
+ queue = (((u64) hash_v6(skb) * info->queues_total) >>
+ 32) + queue;
#endif
}
return NF_QUEUE_NR(queue);
}
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct xt_NFQ_info_v1 *info = par->targinfo;
+ const struct xt_NFQ_info_v2 *info = par->targinfo;
+ unsigned int ret = nfqueue_tg_v1(skb, par);
+
+ if (info->bypass)
+ ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+ return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_NFQ_info_v2 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@
info->queues_total, maxid);
return -ERANGE;
}
+ if (par->target->revision == 2 && info->bypass > 1)
+ return -EINVAL;
return 0;
}
@@ -115,11 +130,20 @@
.name = "NFQUEUE",
.revision = 1,
.family = NFPROTO_UNSPEC,
- .checkentry = nfqueue_tg_v1_check,
+ .checkentry = nfqueue_tg_check,
.target = nfqueue_tg_v1,
.targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+ {
+ .name = "NFQUEUE",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfqueue_tg_check,
+ .target = nfqueue_tg_v2,
+ .targetsize = sizeof(struct xt_NFQ_info_v2),
+ .me = THIS_MODULE,
+ },
};
static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b9..7fd3fd5 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -193,10 +193,12 @@
if (par->family == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
- memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+ memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+ &iph->daddr : &iph->saddr, sizeof(addr.ip6));
} else {
const struct iphdr *iph = ip_hdr(skb);
- addr.ip = iph->saddr;
+ addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+ iph->daddr : iph->saddr;
}
spin_lock_bh(&info->data->lock);
@@ -204,13 +206,12 @@
&info->mask, par->family);
spin_unlock_bh(&info->data->lock);
- if (connections < 0) {
+ if (connections < 0)
/* kmalloc failed, drop it entirely */
- par->hotdrop = true;
- return false;
- }
+ goto hotdrop;
- return (connections > info->limit) ^ info->inverse;
+ return (connections > info->limit) ^
+ !!(info->flags & XT_CONNLIMIT_INVERT);
hotdrop:
par->hotdrop = true;
@@ -268,25 +269,38 @@
kfree(info->data);
}
-static struct xt_match connlimit_mt_reg __read_mostly = {
- .name = "connlimit",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = connlimit_mt_check,
- .match = connlimit_mt,
- .matchsize = sizeof(struct xt_connlimit_info),
- .destroy = connlimit_mt_destroy,
- .me = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+ {
+ .name = "connlimit",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connlimit_mt_check,
+ .match = connlimit_mt,
+ .matchsize = sizeof(struct xt_connlimit_info),
+ .destroy = connlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "connlimit",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connlimit_mt_check,
+ .match = connlimit_mt,
+ .matchsize = sizeof(struct xt_connlimit_info),
+ .destroy = connlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
};
static int __init connlimit_mt_init(void)
{
- return xt_register_match(&connlimit_mt_reg);
+ return xt_register_matches(connlimit_mt_reg,
+ ARRAY_SIZE(connlimit_mt_reg));
}
static void __exit connlimit_mt_exit(void)
{
- xt_unregister_match(&connlimit_mt_reg);
+ xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710..4ef1b63 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@
return true;
}
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+ return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+ const struct nf_conn *ct)
+{
+ const struct nf_conntrack_tuple *tuple;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+ (nf_ct_protonum(ct) == info->l4proto) ^
+ !(info->invert_flags & XT_CONNTRACK_PROTO))
+ return false;
+
+ /* Shortcut to match all recognized protocols by using ->src.all. */
+ if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+ !port_match(info->origsrc_port, info->origsrc_port_high,
+ ntohs(tuple->src.u.all),
+ info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+ return false;
+
+ if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+ !port_match(info->origdst_port, info->origdst_port_high,
+ ntohs(tuple->dst.u.all),
+ info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+ return false;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+ !port_match(info->replsrc_port, info->replsrc_port_high,
+ ntohs(tuple->src.u.all),
+ info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+ return false;
+
+ if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+ !port_match(info->repldst_port, info->repldst_port_high,
+ ntohs(tuple->dst.u.all),
+ info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+ return false;
+
+ return true;
+}
+
static bool
conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@
!(info->invert_flags & XT_CONNTRACK_REPLDST))
return false;
- if (!ct_proto_port_check(info, ct))
- return false;
+ if (par->match->revision != 3) {
+ if (!ct_proto_port_check(info, ct))
+ return false;
+ } else {
+ if (!ct_proto_port_check_v3(par->matchinfo, ct))
+ return false;
+ }
if ((info->match_flags & XT_CONNTRACK_STATUS) &&
(!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@
return conntrack_mt(skb, par, info->state_mask, info->status_mask);
}
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+ return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
static int conntrack_mt_check(const struct xt_mtchk_param *par)
{
int ret;
@@ -244,6 +305,16 @@
.destroy = conntrack_mt_destroy,
.me = THIS_MODULE,
},
+ {
+ .name = "conntrack",
+ .revision = 3,
+ .family = NFPROTO_UNSPEC,
+ .matchsize = sizeof(struct xt_conntrack_mtinfo3),
+ .match = conntrack_mt_v3,
+ .checkentry = conntrack_mt_check,
+ .destroy = conntrack_mt_destroy,
+ .me = THIS_MODULE,
+ },
};
static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a..c7a2e54 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
static int cpu_mt_check(const struct xt_mtchk_param *par)
{
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d..bb10b07 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
if (unlikely(cp == NULL)) {
match = false;
goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91cb1d7..c60649e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@
static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
int closing, int tx_ring);
-#define PGV_FROM_VMALLOC 1
struct pgv {
char *buffer;
};
@@ -523,11 +522,11 @@
{
struct sk_filter *filter;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
res = sk_run_filter(skb, filter->insns);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return res;
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9542449..da8adac 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@
#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
#define RDS_CONG_MAP_BYTES (65536 / 8)
-#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f04d4a4..e318f45 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -205,6 +205,18 @@
If unsure, say N.
+config NET_SCH_MQPRIO
+ tristate "Multi-queue priority scheduler (MQPRIO)"
+ help
+ Say Y here if you want to use the Multi-queue Priority scheduler.
+ This scheduler allows QOS to be offloaded on NICs that have support
+ for offloading QOS schedulers.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_mqprio.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
@@ -243,7 +255,7 @@
config NET_CLS_ROUTE4
tristate "Routing decision (ROUTE)"
- select NET_CLS_ROUTE
+ select IP_ROUTE_CLASSID
select NET_CLS
---help---
If you say Y here, you will be able to classify packets
@@ -252,9 +264,6 @@
To compile this code as a module, choose M here: the
module will be called cls_route.
-config NET_CLS_ROUTE
- bool
-
config NET_CLS_FW
tristate "Netfilter mark (FW)"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5db..26ce681 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f8..15873e1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@
struct tc_action *a, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p;
- int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+ int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@
{
struct tcf_common *p, *s_p;
struct nlattr *nest;
- int i= 0, n_i = 0;
+ int i = 0, n_i = 0;
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
@@ -138,7 +138,7 @@
while (p != NULL) {
s_p = p->tcfc_next;
if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
- module_put(a->ops->owner);
+ module_put(a->ops->owner);
n_i++;
p = s_p;
}
@@ -447,7 +447,8 @@
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+ err = tcf_action_dump_old(skb, a, bind, ref);
+ if (err > 0) {
nla_nest_end(skb, nest);
return err;
}
@@ -491,7 +492,7 @@
struct tc_action *a;
struct tc_action_ops *a_o;
char act_name[IFNAMSIZ];
- struct nlattr *tb[TCA_ACT_MAX+1];
+ struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
int err;
@@ -549,9 +550,9 @@
goto err_free;
/* module count goes up only when brand new policy is created
- if it exists and is only bound to in a_o->init() then
- ACT_P_CREATED is not returned (a zero is).
- */
+ * if it exists and is only bound to in a_o->init() then
+ * ACT_P_CREATED is not returned (a zero is).
+ */
if (err != ACT_P_CREATED)
module_put(a_o->owner);
a->ops = a_o;
@@ -569,7 +570,7 @@
struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind)
{
- struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+ struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *head = NULL, *act, *act_prev = NULL;
int err;
int i;
@@ -697,7 +698,7 @@
static struct tc_action *
tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
{
- struct nlattr *tb[TCA_ACT_MAX+1];
+ struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action *a;
int index;
int err;
@@ -770,7 +771,7 @@
struct tcamsg *t;
struct netlink_callback dcb;
struct nlattr *nest;
- struct nlattr *tb[TCA_ACT_MAX+1];
+ struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
struct tc_action *a = create_a(0);
int err = -ENOMEM;
@@ -821,7 +822,8 @@
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(a->ops->owner);
kfree(a);
- err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
@@ -842,14 +844,14 @@
u32 pid, int event)
{
int i, ret;
- struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+ struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *head = NULL, *act, *act_prev = NULL;
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (ret < 0)
return ret;
- if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+ if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
if (tb[1] != NULL)
return tca_action_flush(net, tb[1], n, pid);
else
@@ -892,7 +894,7 @@
/* now do the delete */
tcf_action_destroy(head, 0);
ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
- n->nlmsg_flags&NLM_F_ECHO);
+ n->nlmsg_flags & NLM_F_ECHO);
if (ret > 0)
return 0;
return ret;
@@ -936,7 +938,7 @@
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
NETLINK_CB(skb).dst_group = RTNLGRP_TC;
- err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
if (err > 0)
err = 0;
return err;
@@ -967,7 +969,7 @@
/* dump then free all the actions after update; inserted policy
* stays intact
- * */
+ */
ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
for (a = act; a; a = act) {
act = a->next;
@@ -993,8 +995,7 @@
return -EINVAL;
}
- /* n->nlmsg_flags&NLM_F_CREATE
- * */
+ /* n->nlmsg_flags & NLM_F_CREATE */
switch (n->nlmsg_type) {
case RTM_NEWACTION:
/* we are going to assume all other flags
@@ -1003,7 +1004,7 @@
* but since we want avoid ambiguity (eg when flags
* is zero) then just set this
*/
- if (n->nlmsg_flags&NLM_F_REPLACE)
+ if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
replay:
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@
static struct nlattr *
find_dump_kind(const struct nlmsghdr *n)
{
- struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+ struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
@@ -1071,9 +1072,8 @@
}
a_o = tc_lookup_action(kind);
- if (a_o == NULL) {
+ if (a_o == NULL)
return 0;
- }
memset(&a, 0, sizeof(struct tc_action));
a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 83ddfc0..6cdf9ab 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+ err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
if (err < 0)
return err;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a..2b4ab4b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@
}
typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
#endif /* CONFIG_GACT_PROB */
static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@
pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
bind, &gact_idx_gen, &gact_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
@@ -205,9 +205,9 @@
static int __init gact_init_module(void)
{
#ifdef CONFIG_GACT_PROB
- printk(KERN_INFO "GACT probability on\n");
+ pr_info("GACT probability on\n");
#else
- printk(KERN_INFO "GACT probability NOT on\n");
+ pr_info("GACT probability NOT on\n");
#endif
return tcf_register_action(&act_gact_ops);
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c2a7c20..9fc211a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@
pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
&ipt_idx_gen, &ipt_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
@@ -162,7 +162,8 @@
if (unlikely(!t))
goto err2;
- if ((err = ipt_init_target(t, tname, hook)) < 0)
+ err = ipt_init_target(t, tname, hook);
+ if (err < 0)
goto err3;
spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@
bstats_update(&ipt->tcf_bstats, skb);
/* yes, we have to worry about both in and out dev
- worry later - danger - this API seems to have changed
- from earlier kernels */
+ * worry later - danger - this API seems to have changed
+ * from earlier kernels
+ */
par.in = skb->dev;
par.out = NULL;
par.hooknum = ipt->tcfi_hook;
@@ -253,9 +255,9 @@
struct tc_cnt c;
/* for simple targets kernel size == user size
- ** user name = target name
- ** for foolproof you need to not assume this
- */
+ * user name = target name
+ * for foolproof you need to not assume this
+ */
t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d765067..961386e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@
.lock = &mirred_lock,
};
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
{
if (m) {
if (bind)
m->tcf_bindcnt--;
m->tcf_refcnt--;
- if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+ if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
list_del(&m->tcfm_list);
if (m->tcfm_dev)
dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 178a4bd..762b027 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&nat_idx_gen, &nat_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
p = to_tcf_nat(pc);
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 445bef7..50c7c06 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&pedit_idx_gen, &pedit_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
@@ -127,11 +127,9 @@
int i, munged = 0;
unsigned int off;
- if (skb_cloned(skb)) {
- if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
- return p->tcf_action;
- }
- }
+ if (skb_cloned(skb) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return p->tcf_action;
off = skb_network_offset(skb);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e2f08b1..8a16307 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
#include <net/act_api.h>
#include <net/netlink.h>
-#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
#define POL_TAB_MASK 15
static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@
};
/* old policer structure from before tc actions */
-struct tc_police_compat
-{
+struct tc_police_compat {
u32 index;
int action;
u32 limit;
@@ -139,7 +138,7 @@
static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
struct tc_action *a, int ovr, int bind)
{
- unsigned h;
+ unsigned int h;
int ret = 0, err;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tc_police *parm;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7287cff..a34a22d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@
/* print policy string followed by _ then packet count
* Example if this was the 3rd packet and the string was "hello"
* then it would look like "hello_3" (without quotes)
- **/
+ */
pr_info("simple: %s_%d\n",
(char *)d->tcfd_defdata, d->tcf_bstats.packets);
spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&simp_idx_gen, &simp_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@
return ret;
}
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
{
struct tcf_defact *d = a->priv;
@@ -158,8 +158,8 @@
return 0;
}
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
- int bind, int ref)
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 836f5fe..5f6f0c7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&skbedit_idx_gen, &skbedit_hash_info);
if (IS_ERR(pc))
- return PTR_ERR(pc);
+ return PTR_ERR(pc);
d = to_skbedit(pc);
ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@
return ret;
}
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
{
struct tcf_skbedit *d = a->priv;
@@ -153,8 +153,8 @@
return 0;
}
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
- int bind, int ref)
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28..bb2c523 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@
int rc = -ENOENT;
write_lock(&cls_mod_lock);
- for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+ for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
if (t == ops)
break;
@@ -111,7 +111,7 @@
u32 first = TC_H_MAKE(0xC0000000U, 0U);
if (tp)
- first = tp->prio-1;
+ first = tp->prio - 1;
return first;
}
@@ -149,7 +149,8 @@
if (prio == 0) {
/* If no priority is given, user wants we allocated it. */
- if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+ if (n->nlmsg_type != RTM_NEWTFILTER ||
+ !(n->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
prio = TC_H_MAKE(0x80000000U, 0U);
}
@@ -176,7 +177,8 @@
}
/* Is it classful? */
- if ((cops = q->ops->cl_ops) == NULL)
+ cops = q->ops->cl_ops;
+ if (!cops)
return -EINVAL;
if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@
goto errout;
/* Check the chain for existence of proto-tcf with this priority */
- for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+ for (back = chain; (tp = *back) != NULL; back = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
- if (!nprio || (tp->protocol != protocol && protocol))
+ if (!nprio ||
+ (tp->protocol != protocol && protocol))
goto errout;
} else
tp = NULL;
@@ -216,7 +219,8 @@
goto errout;
err = -ENOENT;
- if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+ if (n->nlmsg_type != RTM_NEWTFILTER ||
+ !(n->nlmsg_flags & NLM_F_CREATE))
goto errout;
@@ -420,7 +424,8 @@
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return skb->len;
- if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
return skb->len;
if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
goto out;
- if ((cops = q->ops->cl_ops) == NULL)
+ cops = q->ops->cl_ops;
+ if (!cops)
goto errout;
if (cops->tcf_chain == NULL)
goto errout;
@@ -444,8 +450,9 @@
s_t = cb->args[0];
- for (tp=*chain, t=0; tp; tp = tp->next, t++) {
- if (t < s_t) continue;
+ for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+ if (t < s_t)
+ continue;
if (TC_H_MAJ(tcm->tcm_info) &&
TC_H_MAJ(tcm->tcm_info) != tp->prio)
continue;
@@ -468,10 +475,10 @@
arg.skb = skb;
arg.cb = cb;
arg.w.stop = 0;
- arg.w.skip = cb->args[1]-1;
+ arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
tp->ops->walk(tp, &arg.w);
- cb->args[1] = arg.w.count+1;
+ cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
break;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f23d915..8be8872 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
-struct basic_head
-{
+struct basic_head {
u32 hgenerator;
struct list_head flist;
};
-struct basic_filter
-{
+struct basic_filter {
u32 handle;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
@@ -92,8 +90,7 @@
return 0;
}
-static inline void basic_delete_filter(struct tcf_proto *tp,
- struct basic_filter *f)
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
{
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@
[TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
};
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est)
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est)
{
int err = -EINVAL;
struct tcf_exts e;
@@ -203,7 +200,7 @@
} while (--i > 0 && basic_get(tp, head->hgenerator));
if (i <= 0) {
- printk(KERN_ERR "Insufficient number of handles\n");
+ pr_err("Insufficient number of handles\n");
goto errout;
}
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d49c40f..32a3351 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@
{
struct cgroup_cls_state *cs;
- if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+ cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+ if (!cs)
return ERR_PTR(-ENOMEM);
if (cgrp->parent)
@@ -94,8 +95,7 @@
return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
}
-struct cls_cgroup_head
-{
+struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
@@ -166,7 +166,7 @@
u32 handle, struct nlattr **tca,
unsigned long *arg)
{
- struct nlattr *tb[TCA_CGROUP_MAX+1];
+ struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = tp->root;
struct tcf_ematch_tree t;
struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5b271a1..8ec0139 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@
if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ip_hdr(skb);
- if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ if (iph->frag_off & htons(IP_MF | IP_OFFSET))
break;
poff = proto_ports_offset(iph->protocol);
if (poff >= 0 &&
@@ -163,7 +163,7 @@
if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ip_hdr(skb);
- if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ if (iph->frag_off & htons(IP_MF | IP_OFFSET))
break;
poff = proto_ports_offset(iph->protocol);
if (poff >= 0 &&
@@ -276,7 +276,7 @@
static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
if (skb_dst(skb))
return skb_dst(skb)->tclassid;
#endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b..26e7bc4 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
-struct fw_head
-{
+struct fw_head {
struct fw_filter *ht[HTSIZE];
u32 mask;
};
-struct fw_filter
-{
+struct fw_filter {
struct fw_filter *next;
u32 id;
struct tcf_result res;
@@ -53,7 +51,7 @@
.police = TCA_FW_POLICE
};
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
{
if (HTSIZE == 4096)
return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@
static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
- struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f;
int r;
u32 id = skb->mark;
if (head != NULL) {
id &= head->mask;
- for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+ for (f = head->ht[fw_hash(id)]; f; f = f->next) {
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@
}
} else {
/* old method */
- if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+ if (id && (TC_H_MAJ(id) == 0 ||
+ !(TC_H_MAJ(id ^ tp->q->handle)))) {
res->classid = id;
res->class = 0;
return 0;
@@ -117,13 +116,13 @@
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
- struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f;
if (head == NULL)
return 0;
- for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+ for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
if (f->id == handle)
return (unsigned long)f;
}
@@ -139,8 +138,7 @@
return 0;
}
-static inline void
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
{
tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@
if (head == NULL)
return;
- for (h=0; h<HTSIZE; h++) {
- while ((f=head->ht[h]) != NULL) {
+ for (h = 0; h < HTSIZE; h++) {
+ while ((f = head->ht[h]) != NULL) {
head->ht[h] = f->next;
fw_delete_filter(tp, f);
}
@@ -167,14 +165,14 @@
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct fw_head *head = (struct fw_head*)tp->root;
- struct fw_filter *f = (struct fw_filter*)arg;
+ struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_filter *f = (struct fw_filter *)arg;
struct fw_filter **fp;
if (head == NULL || f == NULL)
goto out;
- for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+ for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
if (*fp == f) {
tcf_tree_lock(tp);
*fp = f->next;
@@ -240,7 +238,7 @@
struct nlattr **tca,
unsigned long *arg)
{
- struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f = (struct fw_filter *) *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct fw_head *head = (struct fw_head*)tp->root;
+ struct fw_head *head = (struct fw_head *)tp->root;
int h;
if (head == NULL)
@@ -332,7 +330,7 @@
struct sk_buff *skb, struct tcmsg *t)
{
struct fw_head *head = (struct fw_head *)tp->root;
- struct fw_filter *f = (struct fw_filter*)fh;
+ struct fw_filter *f = (struct fw_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd8..d580cdf 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
#include <net/pkt_cls.h>
/*
- 1. For now we assume that route tags < 256.
- It allows to use direct table lookups, instead of hash tables.
- 2. For now we assume that "from TAG" and "fromdev DEV" statements
- are mutually exclusive.
- 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ * 1. For now we assume that route tags < 256.
+ * It allows to use direct table lookups, instead of hash tables.
+ * 2. For now we assume that "from TAG" and "fromdev DEV" statements
+ * are mutually exclusive.
+ * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
*/
-struct route4_fastmap
-{
+struct route4_fastmap {
struct route4_filter *filter;
u32 id;
int iif;
};
-struct route4_head
-{
+struct route4_head {
struct route4_fastmap fastmap[16];
- struct route4_bucket *table[256+1];
+ struct route4_bucket *table[256 + 1];
};
-struct route4_bucket
-{
+struct route4_bucket {
/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
- struct route4_filter *ht[16+16+1];
+ struct route4_filter *ht[16 + 16 + 1];
};
-struct route4_filter
-{
+struct route4_filter {
struct route4_filter *next;
u32 id;
int iif;
@@ -61,20 +57,20 @@
struct route4_bucket *bkt;
};
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
static const struct tcf_ext_map route_ext_map = {
.police = TCA_ROUTE4_POLICE,
.action = TCA_ROUTE4_ACT
};
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
{
- return id&0xF;
+ return id & 0xF;
}
-static inline
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+static void
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
{
spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
@@ -83,32 +79,33 @@
spin_unlock_bh(root_lock);
}
-static inline void
+static void
route4_set_fastmap(struct route4_head *head, u32 id, int iif,
struct route4_filter *f)
{
int h = route4_fastmap_hash(id, iif);
+
head->fastmap[h].id = id;
head->fastmap[h].iif = iif;
head->fastmap[h].filter = f;
}
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
{
- return id&0xFF;
+ return id & 0xFF;
}
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
{
- return (id>>16)&0xF;
+ return (id >> 16) & 0xF;
}
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
{
- return 16 + ((iif>>16)&0xF);
+ return 16 + ((iif >> 16) & 0xF);
}
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
{
return 32;
}
@@ -131,21 +128,22 @@
static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
- struct route4_head *head = (struct route4_head*)tp->root;
+ struct route4_head *head = (struct route4_head *)tp->root;
struct dst_entry *dst;
struct route4_bucket *b;
struct route4_filter *f;
u32 id, h;
int iif, dont_cache = 0;
- if ((dst = skb_dst(skb)) == NULL)
+ dst = skb_dst(skb);
+ if (!dst)
goto failure;
id = dst->tclassid;
if (head == NULL)
goto old_method;
- iif = ((struct rtable*)dst)->fl.iif;
+ iif = ((struct rtable *)dst)->fl.iif;
h = route4_fastmap_hash(id, iif);
if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@
h = route4_hash_to(id);
restart:
- if ((b = head->table[h]) != NULL) {
+ b = head->table[h];
+ if (b) {
for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
if (f->id == id)
ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@
static inline u32 to_hash(u32 id)
{
- u32 h = id&0xFF;
- if (id&0x8000)
+ u32 h = id & 0xFF;
+
+ if (id & 0x8000)
h += 256;
return h;
}
@@ -211,17 +211,17 @@
if (!(id & 0x8000)) {
if (id > 255)
return 256;
- return id&0xF;
+ return id & 0xF;
}
- return 16 + (id&0xF);
+ return 16 + (id & 0xF);
}
static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{
- struct route4_head *head = (struct route4_head*)tp->root;
+ struct route4_head *head = (struct route4_head *)tp->root;
struct route4_bucket *b;
struct route4_filter *f;
- unsigned h1, h2;
+ unsigned int h1, h2;
if (!head)
return 0;
@@ -230,11 +230,12 @@
if (h1 > 256)
return 0;
- h2 = from_hash(handle>>16);
+ h2 = from_hash(handle >> 16);
if (h2 > 32)
return 0;
- if ((b = head->table[h1]) != NULL) {
+ b = head->table[h1];
+ if (b) {
for (f = b->ht[h2]; f; f = f->next)
if (f->handle == handle)
return (unsigned long)f;
@@ -251,7 +252,7 @@
return 0;
}
-static inline void
+static void
route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
{
tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@
if (head == NULL)
return;
- for (h1=0; h1<=256; h1++) {
+ for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
- if ((b = head->table[h1]) != NULL) {
- for (h2=0; h2<=32; h2++) {
+ b = head->table[h1];
+ if (b) {
+ for (h2 = 0; h2 <= 32; h2++) {
struct route4_filter *f;
while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct route4_head *head = (struct route4_head*)tp->root;
- struct route4_filter **fp, *f = (struct route4_filter*)arg;
- unsigned h = 0;
+ struct route4_head *head = (struct route4_head *)tp->root;
+ struct route4_filter **fp, *f = (struct route4_filter *)arg;
+ unsigned int h = 0;
struct route4_bucket *b;
int i;
@@ -299,7 +301,7 @@
h = f->handle;
b = f->bkt;
- for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+ for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
if (*fp == f) {
tcf_tree_lock(tp);
*fp = f->next;
@@ -310,7 +312,7 @@
/* Strip tree */
- for (i=0; i<=32; i++)
+ for (i = 0; i <= 32; i++)
if (b->ht[i])
return 0;
@@ -380,7 +382,8 @@
}
h1 = to_hash(nhandle);
- if ((b = head->table[h1]) == NULL) {
+ b = head->table[h1];
+ if (!b) {
err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
@@ -391,6 +394,7 @@
tcf_tree_unlock(tp);
} else {
unsigned int h2 = from_hash(nhandle >> 16);
+
err = -EEXIST;
for (fp = b->ht[h2]; fp; fp = fp->next)
if (fp->handle == f->handle)
@@ -444,7 +448,8 @@
if (err < 0)
return err;
- if ((f = (struct route4_filter*)*arg) != NULL) {
+ f = (struct route4_filter *)*arg;
+ if (f) {
if (f->handle != handle && handle)
return -EINVAL;
@@ -481,7 +486,7 @@
reinsert:
h = from_hash(f->handle >> 16);
- for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+ for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
if (f->handle < f1->handle)
break;
@@ -492,7 +497,8 @@
if (old_handle && f->handle != old_handle) {
th = to_hash(old_handle);
h = from_hash(old_handle >> 16);
- if ((b = head->table[th]) != NULL) {
+ b = head->table[th];
+ if (b) {
for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
if (*fp == f) {
*fp = f->next;
@@ -515,7 +521,7 @@
static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct route4_head *head = tp->root;
- unsigned h, h1;
+ unsigned int h, h1;
if (head == NULL)
arg->stop = 1;
@@ -549,7 +555,7 @@
static int route4_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct route4_filter *f = (struct route4_filter*)fh;
+ struct route4_filter *f = (struct route4_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
u32 id;
@@ -563,15 +569,15 @@
if (nest == NULL)
goto nla_put_failure;
- if (!(f->handle&0x8000)) {
- id = f->id&0xFF;
+ if (!(f->handle & 0x8000)) {
+ id = f->id & 0xFF;
NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
}
- if (f->handle&0x80000000) {
- if ((f->handle>>16) != 0xFFFF)
+ if (f->handle & 0x80000000) {
+ if ((f->handle >> 16) != 0xFFFF)
NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
} else {
- id = f->id>>16;
+ id = f->id >> 16;
NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
}
if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a179..402c44b 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
powerful classification engine. */
-struct rsvp_head
-{
+struct rsvp_head {
u32 tmap[256/32];
u32 hgenerator;
u8 tgenerator;
struct rsvp_session *ht[256];
};
-struct rsvp_session
-{
+struct rsvp_session {
struct rsvp_session *next;
__be32 dst[RSVP_DST_LEN];
struct tc_rsvp_gpi dpi;
u8 protocol;
u8 tunnelid;
/* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter *ht[16+1];
+ struct rsvp_filter *ht[16 + 1];
};
-struct rsvp_filter
-{
+struct rsvp_filter {
struct rsvp_filter *next;
__be32 src[RSVP_DST_LEN];
struct tc_rsvp_gpi spi;
@@ -100,17 +97,19 @@
struct rsvp_session *sess;
};
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
{
- unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+ unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
+
h ^= h>>16;
h ^= h>>8;
return (h ^ protocol ^ tunnelid) & 0xFF;
}
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
{
- unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+ unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
+
h ^= h>>16;
h ^= h>>8;
h ^= h>>4;
@@ -134,10 +133,10 @@
static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
- struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+ struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
struct rsvp_session *s;
struct rsvp_filter *f;
- unsigned h1, h2;
+ unsigned int h1, h2;
__be32 *dst, *src;
u8 protocol;
u8 tunnelid = 0;
@@ -162,13 +161,13 @@
src = &nhptr->saddr.s6_addr32[0];
dst = &nhptr->daddr.s6_addr32[0];
protocol = nhptr->nexthdr;
- xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+ xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
#else
src = &nhptr->saddr;
dst = &nhptr->daddr;
protocol = nhptr->protocol;
- xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
- if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+ xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
+ if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
return -1;
#endif
@@ -176,10 +175,10 @@
h2 = hash_src(src);
for (s = sht[h1]; s; s = s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+ if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
protocol == s->protocol &&
!(s->dpi.mask &
- (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+ (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
#if RSVP_DST_LEN == 4
dst[0] == s->dst[0] &&
dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@
tunnelid == s->tunnelid) {
for (f = s->ht[h2]; f; f = f->next) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
- !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+ if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
+ !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
#if RSVP_DST_LEN == 4
&&
src[0] == f->src[0] &&
@@ -205,7 +204,7 @@
return 0;
tunnelid = f->res.classid;
- nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+ nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
goto restart;
}
}
@@ -224,11 +223,11 @@
static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
{
- struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+ struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
struct rsvp_session *s;
struct rsvp_filter *f;
- unsigned h1 = handle&0xFF;
- unsigned h2 = (handle>>8)&0xFF;
+ unsigned int h1 = handle & 0xFF;
+ unsigned int h2 = (handle >> 8) & 0xFF;
if (h2 > 16)
return 0;
@@ -258,7 +257,7 @@
return -ENOBUFS;
}
-static inline void
+static void
rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@
sht = data->ht;
- for (h1=0; h1<256; h1++) {
+ for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
while ((s = sht[h1]) != NULL) {
sht[h1] = s->next;
- for (h2=0; h2<=16; h2++) {
+ for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
- unsigned h = f->handle;
+ struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+ unsigned int h = f->handle;
struct rsvp_session **sp;
struct rsvp_session *s = f->sess;
int i;
- for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+ for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
if (*fp == f) {
tcf_tree_lock(tp);
*fp = f->next;
@@ -314,12 +313,12 @@
/* Strip tree */
- for (i=0; i<=16; i++)
+ for (i = 0; i <= 16; i++)
if (s->ht[i])
return 0;
/* OK, session has no flows */
- for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+ for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
*sp; sp = &(*sp)->next) {
if (*sp == s) {
tcf_tree_lock(tp);
@@ -337,13 +336,14 @@
return 0;
}
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
{
struct rsvp_head *data = tp->root;
int i = 0xFFFF;
while (i-- > 0) {
u32 h;
+
if ((data->hgenerator += 0x10000) == 0)
data->hgenerator = 0x10000;
h = data->hgenerator|salt;
@@ -355,10 +355,10 @@
static int tunnel_bts(struct rsvp_head *data)
{
- int n = data->tgenerator>>5;
- u32 b = 1<<(data->tgenerator&0x1F);
+ int n = data->tgenerator >> 5;
+ u32 b = 1 << (data->tgenerator & 0x1F);
- if (data->tmap[n]&b)
+ if (data->tmap[n] & b)
return 0;
data->tmap[n] |= b;
return 1;
@@ -372,10 +372,10 @@
memset(tmap, 0, sizeof(tmap));
- for (h1=0; h1<256; h1++) {
+ for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
for (s = sht[h1]; s; s = s->next) {
- for (h2=0; h2<=16; h2++) {
+ for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@
{
int i, k;
- for (k=0; k<2; k++) {
- for (i=255; i>0; i--) {
+ for (k = 0; k < 2; k++) {
+ for (i = 255; i > 0; i--) {
if (++data->tgenerator == 0)
data->tgenerator = 1;
if (tunnel_bts(data))
@@ -428,7 +428,7 @@
struct nlattr *opt = tca[TCA_OPTIONS-1];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
- unsigned h1, h2;
+ unsigned int h1, h2;
__be32 *dst;
int err;
@@ -443,7 +443,8 @@
if (err < 0)
return err;
- if ((f = (struct rsvp_filter*)*arg) != NULL) {
+ f = (struct rsvp_filter *)*arg;
+ if (f) {
/* Node exists: adjust only classid */
if (f->handle != handle && handle)
@@ -500,7 +501,7 @@
goto errout;
}
- for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+ for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
pinfo && pinfo->protocol == s->protocol &&
memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@
tcf_exts_change(tp, &f->exts, &e);
for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
- if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+ if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
break;
f->next = *fp;
wmb();
@@ -567,7 +568,7 @@
static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct rsvp_head *head = tp->root;
- unsigned h, h1;
+ unsigned int h, h1;
if (arg->stop)
return;
@@ -598,7 +599,7 @@
static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct rsvp_filter *f = (struct rsvp_filter*)fh;
+ struct rsvp_filter *f = (struct rsvp_filter *)fh;
struct rsvp_session *s;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -624,7 +625,7 @@
NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
if (f->res.classid)
NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
- if (((f->handle>>8)&0xFF) != 16)
+ if (((f->handle >> 8) & 0xFF) != 16)
NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330..36667fa 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@
* of the hashing index is below the threshold.
*/
if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
- cp.hash = (cp.mask >> cp.shift)+1;
+ cp.hash = (cp.mask >> cp.shift) + 1;
else
cp.hash = DEFAULT_HASH_SIZE;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82..966920c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
-struct tc_u_knode
-{
+struct tc_u_knode {
struct tc_u_knode *next;
u32 handle;
struct tc_u_hnode *ht_up;
@@ -63,19 +62,17 @@
struct tc_u32_sel sel;
};
-struct tc_u_hnode
-{
+struct tc_u_hnode {
struct tc_u_hnode *next;
u32 handle;
u32 prio;
struct tc_u_common *tp_c;
int refcnt;
- unsigned divisor;
+ unsigned int divisor;
struct tc_u_knode *ht[1];
};
-struct tc_u_common
-{
+struct tc_u_common {
struct tc_u_hnode *hlist;
struct Qdisc *q;
int refcnt;
@@ -87,9 +84,11 @@
.police = TCA_U32_POLICE
};
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+ const struct tc_u32_sel *sel,
+ u8 fshift)
{
- unsigned h = ntohl(key & sel->hmask)>>fshift;
+ unsigned int h = ntohl(key & sel->hmask) >> fshift;
return h;
}
@@ -101,7 +100,7 @@
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+ struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -120,7 +119,7 @@
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rcnt +=1;
+ n->pf->rcnt += 1;
j = 0;
#endif
@@ -133,7 +132,7 @@
}
#endif
- for (i = n->sel.nkeys; i>0; i--, key++) {
+ for (i = n->sel.nkeys; i > 0; i--, key++) {
int toff = off + key->off + (off2 & key->offmask);
__be32 *data, _data;
@@ -148,13 +147,13 @@
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
- n->pf->kcnts[j] +=1;
+ n->pf->kcnts[j] += 1;
j++;
#endif
}
if (n->ht_down == NULL) {
check_terminal:
- if (n->sel.flags&TC_U32_TERMINAL) {
+ if (n->sel.flags & TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@
}
#endif
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rhit +=1;
+ n->pf->rhit += 1;
#endif
r = tcf_exts_exec(skb, &n->exts, res);
if (r < 0) {
@@ -197,10 +196,10 @@
sel = ht->divisor & u32_hash_fold(*data, &n->sel,
n->fshift);
}
- if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+ if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
goto next_ht;
- if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+ if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
off2 = n->sel.off + 3;
if (n->sel.flags & TC_U32_VAROFFSET) {
__be16 *data, _data;
@@ -215,7 +214,7 @@
}
off2 &= ~3;
}
- if (n->sel.flags&TC_U32_EAT) {
+ if (n->sel.flags & TC_U32_EAT) {
off += off2;
off2 = 0;
}
@@ -236,11 +235,11 @@
deadloop:
if (net_ratelimit())
- printk(KERN_WARNING "cls_u32: dead loop\n");
+ pr_warning("cls_u32: dead loop\n");
return -1;
}
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@
return ht;
}
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
{
- unsigned sel;
+ unsigned int sel;
struct tc_u_knode *n = NULL;
sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@
do {
if (++tp_c->hgenerator == 0x7FF)
tp_c->hgenerator = 1;
- } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+ } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
}
@@ -378,9 +377,9 @@
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_knode *n;
- unsigned h;
+ unsigned int h;
- for (h=0; h<=ht->divisor; h++) {
+ for (h = 0; h <= ht->divisor; h++) {
while ((n = ht->ht[h]) != NULL) {
ht->ht[h] = n->next;
@@ -446,13 +445,13 @@
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+ struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
if (ht == NULL)
return 0;
if (TC_U32_KEY(ht->handle))
- return u32_delete_key(tp, (struct tc_u_knode*)ht);
+ return u32_delete_key(tp, (struct tc_u_knode *)ht);
if (tp->root == ht)
return -EINVAL;
@@ -470,14 +469,14 @@
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
struct tc_u_knode *n;
- unsigned i = 0x7FF;
+ unsigned int i = 0x7FF;
- for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+ for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
if (i < TC_U32_NODE(n->handle))
i = TC_U32_NODE(n->handle);
i++;
- return handle|(i>0xFFF ? 0xFFF : i);
+ return handle | (i > 0xFFF ? 0xFFF : i);
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@
if (err < 0)
return err;
- if ((n = (struct tc_u_knode*)*arg) != NULL) {
+ n = (struct tc_u_knode *)*arg;
+ if (n) {
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
@@ -574,7 +574,7 @@
}
if (tb[TCA_U32_DIVISOR]) {
- unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+ unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
if (--divisor > 0x100)
return -EINVAL;
@@ -585,7 +585,7 @@
if (handle == 0)
return -ENOMEM;
}
- ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+ ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
ht->tp_c = tp_c;
@@ -683,7 +683,7 @@
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
struct tc_u_knode *n;
- unsigned h;
+ unsigned int h;
if (arg->stop)
return;
@@ -717,7 +717,7 @@
static int u32_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tc_u_knode *n = (struct tc_u_knode*)fh;
+ struct tc_u_knode *n = (struct tc_u_knode *)fh;
struct nlattr *nest;
if (n == NULL)
@@ -730,8 +730,9 @@
goto nla_put_failure;
if (TC_U32_KEY(n->handle) == 0) {
- struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
- u32 divisor = ht->divisor+1;
+ struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+ u32 divisor = ht->divisor + 1;
+
NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
} else {
NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@
goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
- if(strlen(n->indev))
+ if (strlen(n->indev))
NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
#endif
#ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc45039..1c8360a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@
return 0;
switch (cmp->align) {
- case TCF_EM_ALIGN_U8:
- val = *ptr;
- break;
+ case TCF_EM_ALIGN_U8:
+ val = *ptr;
+ break;
- case TCF_EM_ALIGN_U16:
- val = get_unaligned_be16(ptr);
+ case TCF_EM_ALIGN_U16:
+ val = get_unaligned_be16(ptr);
- if (cmp_needs_transformation(cmp))
- val = be16_to_cpu(val);
- break;
+ if (cmp_needs_transformation(cmp))
+ val = be16_to_cpu(val);
+ break;
- case TCF_EM_ALIGN_U32:
- /* Worth checking boundries? The branching seems
- * to get worse. Visit again. */
- val = get_unaligned_be32(ptr);
+ case TCF_EM_ALIGN_U32:
+ /* Worth checking boundries? The branching seems
+ * to get worse. Visit again.
+ */
+ val = get_unaligned_be32(ptr);
- if (cmp_needs_transformation(cmp))
- val = be32_to_cpu(val);
- break;
+ if (cmp_needs_transformation(cmp))
+ val = be32_to_cpu(val);
+ break;
- default:
- return 0;
+ default:
+ return 0;
}
if (cmp->mask)
val &= cmp->mask;
switch (cmp->opnd) {
- case TCF_EM_OPND_EQ:
- return val == cmp->val;
- case TCF_EM_OPND_LT:
- return val < cmp->val;
- case TCF_EM_OPND_GT:
- return val > cmp->val;
+ case TCF_EM_OPND_EQ:
+ return val == cmp->val;
+ case TCF_EM_OPND_LT:
+ return val < cmp->val;
+ case TCF_EM_OPND_GT:
+ return val > cmp->val;
}
return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 34da5e2..a889d09 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -73,21 +73,18 @@
#include <net/pkt_cls.h>
#include <net/sock.h>
-struct meta_obj
-{
+struct meta_obj {
unsigned long value;
unsigned int len;
};
-struct meta_value
-{
+struct meta_value {
struct tcf_meta_val hdr;
unsigned long val;
unsigned int len;
};
-struct meta_match
-{
+struct meta_match {
struct meta_value lvalue;
struct meta_value rvalue;
};
@@ -255,7 +252,7 @@
if (unlikely(skb_dst(skb) == NULL))
*err = -1;
else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
dst->value = skb_dst(skb)->tclassid;
#else
dst->value = 0;
@@ -483,8 +480,7 @@
* Meta value collectors assignment table
**************************************************************************/
-struct meta_ops
-{
+struct meta_ops {
void (*get)(struct sk_buff *, struct tcf_pkt_info *,
struct meta_value *, struct meta_obj *, int *);
};
@@ -494,7 +490,7 @@
/* Meta value operations table listing all meta value collectors and
* assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
[TCF_META_TYPE_VAR] = {
[META_ID(DEV)] = META_FUNC(var_dev),
[META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@
}
};
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
{
return &__meta_ops[meta_type(val)][meta_id(val)];
}
@@ -649,9 +645,8 @@
{
if (v->len == sizeof(unsigned long))
NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
- else if (v->len == sizeof(u32)) {
+ else if (v->len == sizeof(u32))
NLA_PUT_U32(skb, tlv, v->val);
- }
return 0;
@@ -663,8 +658,7 @@
* Type specific operations table
**************************************************************************/
-struct meta_type_ops
-{
+struct meta_type_ops {
void (*destroy)(struct meta_value *);
int (*compare)(struct meta_obj *, struct meta_obj *);
int (*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@
int (*dump)(struct sk_buff *, struct meta_value *, int);
};
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
[TCF_META_TYPE_VAR] = {
.destroy = meta_var_destroy,
.compare = meta_var_compare,
@@ -688,7 +682,7 @@
}
};
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
{
return &__meta_type_ops[meta_type(v)];
}
@@ -713,7 +707,7 @@
return err;
if (meta_type_ops(v)->apply_extras)
- meta_type_ops(v)->apply_extras(v, dst);
+ meta_type_ops(v)->apply_extras(v, dst);
return 0;
}
@@ -732,12 +726,12 @@
r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
switch (meta->lvalue.hdr.op) {
- case TCF_EM_OPND_EQ:
- return !r;
- case TCF_EM_OPND_LT:
- return r < 0;
- case TCF_EM_OPND_GT:
- return r > 0;
+ case TCF_EM_OPND_EQ:
+ return !r;
+ case TCF_EM_OPND_LT:
+ return r < 0;
+ case TCF_EM_OPND_GT:
+ return r > 0;
}
return 0;
@@ -771,7 +765,7 @@
static inline int meta_is_supported(struct meta_value *val)
{
- return (!meta_id(val) || meta_ops(val)->get);
+ return !meta_id(val) || meta_ops(val)->get;
}
static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176a..a3bed07 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
#include <linux/tc_ematch/tc_em_nbyte.h>
#include <net/pkt_cls.h>
-struct nbyte_data
-{
+struct nbyte_data {
struct tcf_em_nbyte hdr;
char pattern[0];
};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index ea8f566..15d353d 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
#include <linux/tc_ematch/tc_em_text.h>
#include <net/pkt_cls.h>
-struct text_match
-{
+struct text_match {
u16 from_offset;
u16 to_offset;
u8 from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f147..797bdb8 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@
if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
return 0;
- return !(((*(__be32*) ptr) ^ key->val) & key->mask);
+ return !(((*(__be32 *) ptr) ^ key->val) & key->mask);
}
static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da9..88d93eb 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
static LIST_HEAD(ematch_ops);
static DEFINE_RWLOCK(ematch_mod_lock);
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
{
struct tcf_ematch_ops *e = NULL;
@@ -163,8 +163,8 @@
}
EXPORT_SYMBOL(tcf_em_unregister);
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
- int index)
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
+ int index)
{
return &tree->matches[index];
}
@@ -184,7 +184,8 @@
if (em_hdr->kind == TCF_EM_CONTAINER) {
/* Special ematch called "container", carries an index
- * referencing an external ematch sequence. */
+ * referencing an external ematch sequence.
+ */
u32 ref;
if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@
goto errout;
/* We do not allow backward jumps to avoid loops and jumps
- * to our own position are of course illegal. */
+ * to our own position are of course illegal.
+ */
if (ref <= idx)
goto errout;
@@ -208,7 +210,8 @@
* which automatically releases the reference again, therefore
* the module MUST not be given back under any circumstances
* here. Be aware, the destroy function assumes that the
- * module is held if the ops field is non zero. */
+ * module is held if the ops field is non zero.
+ */
em->ops = tcf_em_lookup(em_hdr->kind);
if (em->ops == NULL) {
@@ -221,7 +224,8 @@
if (em->ops) {
/* We dropped the RTNL mutex in order to
* perform the module load. Tell the caller
- * to replay the request. */
+ * to replay the request.
+ */
module_put(em->ops->owner);
err = -EAGAIN;
}
@@ -230,7 +234,8 @@
}
/* ematch module provides expected length of data, so we
- * can do a basic sanity check. */
+ * can do a basic sanity check.
+ */
if (em->ops->datalen && data_len < em->ops->datalen)
goto errout;
@@ -246,7 +251,8 @@
* TCF_EM_SIMPLE may be specified stating that the
* data only consists of a u32 integer and the module
* does not expected a memory reference but rather
- * the value carried. */
+ * the value carried.
+ */
if (em_hdr->flags & TCF_EM_SIMPLE) {
if (data_len < sizeof(u32))
goto errout;
@@ -334,7 +340,8 @@
* The array of rt attributes is parsed in the order as they are
* provided, their type must be incremental from 1 to n. Even
* if it does not serve any real purpose, a failure of sticking
- * to this policy will result in parsing failure. */
+ * to this policy will result in parsing failure.
+ */
for (idx = 0; nla_ok(rt_match, list_len); idx++) {
err = -EINVAL;
@@ -359,7 +366,8 @@
/* Check if the number of matches provided by userspace actually
* complies with the array of matches. The number was used for
* the validation of references and a mismatch could lead to
- * undefined references during the matching process. */
+ * undefined references during the matching process.
+ */
if (idx != tree_hdr->nmatches) {
err = -EINVAL;
goto errout_abort;
@@ -449,7 +457,7 @@
.flags = em->flags
};
- NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+ NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
if (em->ops && em->ops->dump) {
if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@
struct tcf_pkt_info *info)
{
int r = em->ops->match(skb, em, info);
+
return tcf_em_is_inverted(em) ? !r : r;
}
@@ -527,8 +536,8 @@
stack_overflow:
if (net_ratelimit())
- printk(KERN_WARNING "tc ematch: local stack overflow,"
- " increase NET_EMATCH_STACK\n");
+ pr_warning("tc ematch: local stack overflow,"
+ " increase NET_EMATCH_STACK\n");
return -1;
}
EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b22ca2d..1507415 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@
int err = -ENOENT;
write_lock(&qdisc_mod_lock);
- for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+ for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
if (q == qops)
break;
if (q) {
@@ -321,7 +321,9 @@
if (!tab || --tab->refcnt)
return;
- for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+ for (rtabp = &qdisc_rtab_list;
+ (rtab = *rtabp) != NULL;
+ rtabp = &rtab->next) {
if (rtab == tab) {
*rtabp = rtab->next;
kfree(rtab);
@@ -396,6 +398,11 @@
return stab;
}
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct qdisc_size_table, rcu));
+}
+
void qdisc_put_stab(struct qdisc_size_table *tab)
{
if (!tab)
@@ -405,7 +412,7 @@
if (--tab->refcnt == 0) {
list_del(&tab->list);
- kfree(tab);
+ call_rcu_bh(&tab->rcu, stab_kfree_rcu);
}
spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@
return -1;
}
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
{
int pkt_len, slot;
@@ -454,14 +461,13 @@
pkt_len = 1;
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
{
if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
- printk(KERN_WARNING
- "%s: %s qdisc %X: is non-work-conserving?\n",
- txt, qdisc->ops->id, qdisc->handle >> 16);
+ pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+ txt, qdisc->ops->id, qdisc->handle >> 16);
qdisc->flags |= TCQ_F_WARN_NONWC;
}
}
@@ -472,7 +478,7 @@
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
timer);
- wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(wd->qdisc);
__netif_schedule(qdisc_root(wd->qdisc));
return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@
&qdisc_root_sleeping(wd->qdisc)->state))
return;
- wd->qdisc->flags |= TCQ_F_THROTTLED;
+ qdisc_throttled(wd->qdisc);
time = ktime_set(0, 0);
time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
{
hrtimer_cancel(&wd->timer);
- wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(wd->qdisc);
}
EXPORT_SYMBOL(qdisc_watchdog_cancel);
@@ -625,7 +631,7 @@
autohandle = TC_H_MAKE(0x80000000U, 0);
} while (qdisc_lookup(dev, autohandle) && --i > 0);
- return i>0 ? autohandle : 0;
+ return i > 0 ? autohandle : 0;
}
void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@
err = PTR_ERR(stab);
goto err_out4;
}
- sch->stab = stab;
+ rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
spinlock_t *root_lock;
@@ -874,7 +880,7 @@
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
*/
- qdisc_put_stab(sch->stab);
+ qdisc_put_stab(rtnl_dereference(sch->stab));
if (ops->destroy)
ops->destroy(sch);
goto err_out3;
@@ -882,7 +888,7 @@
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
{
- struct qdisc_size_table *stab = NULL;
+ struct qdisc_size_table *ostab, *stab = NULL;
int err = 0;
if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@
return PTR_ERR(stab);
}
- qdisc_put_stab(sch->stab);
- sch->stab = stab;
+ ostab = rtnl_dereference(sch->stab);
+ rcu_assign_pointer(sch->stab, stab);
+ qdisc_put_stab(ostab);
if (tca[TCA_RATE]) {
/* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@
return 0;
}
-struct check_loop_arg
-{
- struct qdisc_walker w;
+struct check_loop_arg {
+ struct qdisc_walker w;
struct Qdisc *p;
int depth;
};
@@ -970,7 +976,8 @@
struct Qdisc *p = NULL;
int err;
- if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
return -ENODEV;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@
if (clid) {
if (clid != TC_H_ROOT) {
if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
- if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+ p = qdisc_lookup(dev, TC_H_MAJ(clid));
+ if (!p)
return -ENOENT;
q = qdisc_leaf(p, clid);
- } else { /* ingress */
- if (dev_ingress_queue(dev))
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ } else if (dev_ingress_queue(dev)) {
+ q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
q = dev->qdisc;
@@ -996,7 +1003,8 @@
if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
return -EINVAL;
} else {
- if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+ q = qdisc_lookup(dev, tcm->tcm_handle);
+ if (!q)
return -ENOENT;
}
@@ -1008,7 +1016,8 @@
return -EINVAL;
if (q->handle == 0)
return -ENOENT;
- if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+ err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+ if (err != 0)
return err;
} else {
qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@
}
/*
- Create/change qdisc.
+ * Create/change qdisc.
*/
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@
clid = tcm->tcm_parent;
q = p = NULL;
- if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
return -ENODEV;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@
if (clid) {
if (clid != TC_H_ROOT) {
if (clid != TC_H_INGRESS) {
- if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+ p = qdisc_lookup(dev, TC_H_MAJ(clid));
+ if (!p)
return -ENOENT;
q = qdisc_leaf(p, clid);
- } else { /* ingress */
- if (dev_ingress_queue_create(dev))
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ } else if (dev_ingress_queue_create(dev)) {
+ q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
q = dev->qdisc;
@@ -1063,13 +1073,14 @@
if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
if (tcm->tcm_handle) {
- if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+ if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
if (TC_H_MIN(tcm->tcm_handle))
return -EINVAL;
- if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+ q = qdisc_lookup(dev, tcm->tcm_handle);
+ if (!q)
goto create_n_graft;
- if (n->nlmsg_flags&NLM_F_EXCL)
+ if (n->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
return -EINVAL;
@@ -1079,7 +1090,7 @@
atomic_inc(&q->refcnt);
goto graft;
} else {
- if (q == NULL)
+ if (!q)
goto create_n_graft;
/* This magic test requires explanation.
@@ -1101,9 +1112,9 @@
* For now we select create/graft, if
* user gave KIND, which does not match existing.
*/
- if ((n->nlmsg_flags&NLM_F_CREATE) &&
- (n->nlmsg_flags&NLM_F_REPLACE) &&
- ((n->nlmsg_flags&NLM_F_EXCL) ||
+ if ((n->nlmsg_flags & NLM_F_CREATE) &&
+ (n->nlmsg_flags & NLM_F_REPLACE) &&
+ ((n->nlmsg_flags & NLM_F_EXCL) ||
(tca[TCA_KIND] &&
nla_strcmp(tca[TCA_KIND], q->ops->id))))
goto create_n_graft;
@@ -1118,7 +1129,7 @@
/* Change qdisc parameters */
if (q == NULL)
return -ENOENT;
- if (n->nlmsg_flags&NLM_F_EXCL)
+ if (n->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
return -EINVAL;
@@ -1128,7 +1139,7 @@
return err;
create_n_graft:
- if (!(n->nlmsg_flags&NLM_F_CREATE))
+ if (!(n->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
+ struct qdisc_size_table *stab;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@
goto nla_put_failure;
q->qstats.qlen = q->q.qlen;
- if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+ stab = rtnl_dereference(q->stab);
+ if (stab && qdisc_dump_stab(skb, stab) < 0)
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@
return -ENOBUFS;
if (old && !tc_qdisc_dump_ignore(old)) {
- if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+ if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+ 0, RTM_DELQDISC) < 0)
goto err_out;
}
if (new && !tc_qdisc_dump_ignore(new)) {
- if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+ old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
}
if (skb->len)
- return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
err_out:
kfree_skb(skb);
@@ -1275,7 +1291,7 @@
q_idx++;
continue;
}
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
@@ -1356,7 +1372,8 @@
u32 qid = TC_H_MAJ(clid);
int err;
- if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
return -ENODEV;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@
qid = dev->qdisc->handle;
/* Now qid is genuine qdisc handle consistent
- both with parent and child.
-
- TC_H_MAJ(pid) still may be unspecified, complete it now.
+ * both with parent and child.
+ *
+ * TC_H_MAJ(pid) still may be unspecified, complete it now.
*/
if (pid)
pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@
}
/* OK. Locate qdisc */
- if ((q = qdisc_lookup(dev, qid)) == NULL)
+ q = qdisc_lookup(dev, qid);
+ if (!q)
return -ENOENT;
/* An check that it supports classes */
@@ -1423,13 +1441,14 @@
if (cl == 0) {
err = -ENOENT;
- if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+ if (n->nlmsg_type != RTM_NEWTCLASS ||
+ !(n->nlmsg_flags & NLM_F_CREATE))
goto out;
} else {
switch (n->nlmsg_type) {
case RTM_NEWTCLASS:
err = -EEXIST;
- if (n->nlmsg_flags&NLM_F_EXCL)
+ if (n->nlmsg_flags & NLM_F_EXCL)
goto out;
break;
case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@
return -EINVAL;
}
- return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
-struct qdisc_dump_args
-{
- struct qdisc_walker w;
- struct sk_buff *skb;
- struct netlink_callback *cb;
+struct qdisc_dump_args {
+ struct qdisc_walker w;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
};
static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+ struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
struct net *net = sock_net(skb->sk);
struct netdev_queue *dev_queue;
struct net_device *dev;
@@ -1598,7 +1617,8 @@
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return 0;
- if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+ dev = dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
return 0;
s_t = cb->args[0];
@@ -1621,19 +1641,22 @@
}
/* Main classifier routine: scans classifier chain attached
- to this qdisc, (optionally) tests for protocol and asks
- specific classifiers.
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
*/
int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
__be16 protocol = skb->protocol;
- int err = 0;
+ int err;
for (; tp; tp = tp->next) {
- if ((tp->protocol == protocol ||
- tp->protocol == htons(ETH_P_ALL)) &&
- (err = tp->classify(skb, tp, res)) >= 0) {
+ if (tp->protocol != protocol &&
+ tp->protocol != htons(ETH_P_ALL))
+ continue;
+ err = tp->classify(skb, tp, res);
+
+ if (err >= 0) {
#ifdef CONFIG_NET_CLS_ACT
if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1664,11 +1687,11 @@
if (verd++ >= MAX_REC_LOOP) {
if (net_ratelimit())
- printk(KERN_NOTICE
- "%s: packet reclassify loop"
+ pr_notice("%s: packet reclassify loop"
" rule prio %u protocol %02x\n",
- tp->q->ops->id,
- tp->prio & 0xffff, ntohs(tp->protocol));
+ tp->q->ops->id,
+ tp->prio & 0xffff,
+ ntohs(tp->protocol));
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@
err = register_pernet_subsys(&psched_net_ops);
if (err) {
- printk(KERN_ERR "pktsched_init: "
+ pr_err("pktsched_init: "
"cannot initialize per netns operations\n");
return err;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 943d733..3f08158 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@
* creation), and one for the reference held when calling delete.
*/
if (flow->ref < 2) {
- printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+ pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
return -EINVAL;
}
if (flow->ref > 2)
@@ -384,12 +384,12 @@
}
}
flow = NULL;
- done:
- ;
+done:
+ ;
}
- if (!flow)
+ if (!flow) {
flow = &p->link;
- else {
+ } else {
if (flow->vcc)
ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
/*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@
list_for_each_entry_safe(flow, tmp, &p->flows, list) {
if (flow->ref > 1)
- printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
- flow->ref);
+ pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
atm_tc_put(sch, (unsigned long)flow);
}
tasklet_kill(&p->task);
@@ -616,9 +615,8 @@
}
if (flow->excess)
NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
- else {
+ else
NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
- }
nla_nest_end(skb, nest);
return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5f63ec5..24d94c0 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
struct cbq_sched_data;
-struct cbq_class
-{
+struct cbq_class {
struct Qdisc_class_common common;
struct cbq_class *next_alive; /* next class with backlog in this priority band */
@@ -139,19 +138,18 @@
int refcnt;
int filters;
- struct cbq_class *defaults[TC_PRIO_MAX+1];
+ struct cbq_class *defaults[TC_PRIO_MAX + 1];
};
-struct cbq_sched_data
-{
+struct cbq_sched_data {
struct Qdisc_class_hash clhash; /* Hash table of all classes */
- int nclasses[TC_CBQ_MAXPRIO+1];
- unsigned quanta[TC_CBQ_MAXPRIO+1];
+ int nclasses[TC_CBQ_MAXPRIO + 1];
+ unsigned int quanta[TC_CBQ_MAXPRIO + 1];
struct cbq_class link;
- unsigned activemask;
- struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
+ unsigned int activemask;
+ struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
with backlog */
#ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@
int tx_len;
psched_time_t now; /* Cached timestamp */
psched_time_t now_rt; /* Cached real time */
- unsigned pmask;
+ unsigned int pmask;
struct hrtimer delay_timer;
struct qdisc_watchdog watchdog; /* Watchdog timer,
@@ -175,9 +173,9 @@
};
-#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
{
struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@
static struct cbq_class *
cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
{
- struct cbq_class *cl, *new;
+ struct cbq_class *cl;
- for (cl = this->tparent; cl; cl = cl->tparent)
- if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
+ for (cl = this->tparent; cl; cl = cl->tparent) {
+ struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
+
+ if (new != NULL && new != this)
return new;
-
+ }
return NULL;
}
#endif
/* Classify packet. The procedure is pretty complicated, but
- it allows us to combine link sharing and priority scheduling
- transparently.
-
- Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
- so that it resolves to split nodes. Then packets are classified
- by logical priority, or a more specific classifier may be attached
- to the split node.
+ * it allows us to combine link sharing and priority scheduling
+ * transparently.
+ *
+ * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+ * so that it resolves to split nodes. Then packets are classified
+ * by logical priority, or a more specific classifier may be attached
+ * to the split node.
*/
static struct cbq_class *
@@ -227,7 +227,7 @@
/*
* Step 1. If skb->priority points to one of our classes, use it.
*/
- if (TC_H_MAJ(prio^sch->handle) == 0 &&
+ if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
(cl = cbq_class_lookup(q, prio)) != NULL)
return cl;
@@ -243,10 +243,11 @@
(result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
goto fallback;
- if ((cl = (void*)res.class) == NULL) {
+ cl = (void *)res.class;
+ if (!cl) {
if (TC_H_MAJ(res.classid))
cl = cbq_class_lookup(q, res.classid);
- else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+ else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
cl = defmap[TC_PRIO_BESTEFFORT];
if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@
* Step 4. No success...
*/
if (TC_H_MAJ(prio) == 0 &&
- !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+ !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
!(cl = head->defaults[TC_PRIO_BESTEFFORT]))
return head;
@@ -290,12 +291,12 @@
}
/*
- A packet has just been enqueued on the empty class.
- cbq_activate_class adds it to the tail of active class list
- of its priority band.
+ * A packet has just been enqueued on the empty class.
+ * cbq_activate_class adds it to the tail of active class list
+ * of its priority band.
*/
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
int prio = cl->cpriority;
@@ -314,9 +315,9 @@
}
/*
- Unlink class from active chain.
- Note that this same procedure is done directly in cbq_dequeue*
- during round-robin procedure.
+ * Unlink class from active chain.
+ * Note that this same procedure is done directly in cbq_dequeue*
+ * during round-robin procedure.
*/
static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@
{
int toplevel = q->toplevel;
- if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+ if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
psched_time_t now;
psched_tdiff_t incr;
@@ -363,7 +364,7 @@
q->toplevel = cl->level;
return;
}
- } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+ } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
}
}
@@ -417,11 +418,11 @@
delay += cl->offtime;
/*
- Class goes to sleep, so that it will have no
- chance to work avgidle. Let's forgive it 8)
-
- BTW cbq-2.0 has a crap in this
- place, apparently they forgot to shift it by cl->ewma_log.
+ * Class goes to sleep, so that it will have no
+ * chance to work avgidle. Let's forgive it 8)
+ *
+ * BTW cbq-2.0 has a crap in this
+ * place, apparently they forgot to shift it by cl->ewma_log.
*/
if (cl->avgidle < 0)
delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@
q->wd_expires = delay;
/* Dirty work! We must schedule wakeups based on
- real available rate, rather than leaf rate,
- which may be tiny (even zero).
+ * real available rate, rather than leaf rate,
+ * which may be tiny (even zero).
*/
if (q->toplevel == TC_CBQ_MAXLEVEL) {
struct cbq_class *b;
@@ -459,7 +460,7 @@
}
/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
- they go overlimit
+ * they go overlimit
*/
static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@
struct Qdisc *sch = q->watchdog.qdisc;
psched_time_t now;
psched_tdiff_t delay = 0;
- unsigned pmask;
+ unsigned int pmask;
now = psched_get_time();
@@ -623,7 +624,7 @@
hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
}
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(sch);
__netif_schedule(qdisc_root(sch));
return HRTIMER_NORESTART;
}
@@ -663,15 +664,15 @@
#endif
/*
- It is mission critical procedure.
+ * It is mission critical procedure.
+ *
+ * We "regenerate" toplevel cutoff, if transmitting class
+ * has backlog and it is not regulated. It is not part of
+ * original CBQ description, but looks more reasonable.
+ * Probably, it is wrong. This question needs further investigation.
+ */
- We "regenerate" toplevel cutoff, if transmitting class
- has backlog and it is not regulated. It is not part of
- original CBQ description, but looks more reasonable.
- Probably, it is wrong. This question needs further investigation.
-*/
-
-static __inline__ void
+static inline void
cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
struct cbq_class *borrowed)
{
@@ -682,7 +683,7 @@
q->toplevel = borrowed->level;
return;
}
- } while ((borrowed=borrowed->borrow) != NULL);
+ } while ((borrowed = borrowed->borrow) != NULL);
}
#if 0
/* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@
cl->bstats.bytes += len;
/*
- (now - last) is total time between packet right edges.
- (last_pktlen/rate) is "virtual" busy time, so that
-
- idle = (now - last) - last_pktlen/rate
+ * (now - last) is total time between packet right edges.
+ * (last_pktlen/rate) is "virtual" busy time, so that
+ *
+ * idle = (now - last) - last_pktlen/rate
*/
idle = q->now - cl->last;
@@ -723,9 +724,9 @@
idle -= L2T(cl, len);
/* true_avgidle := (1-W)*true_avgidle + W*idle,
- where W=2^{-ewma_log}. But cl->avgidle is scaled:
- cl->avgidle == true_avgidle/W,
- hence:
+ * where W=2^{-ewma_log}. But cl->avgidle is scaled:
+ * cl->avgidle == true_avgidle/W,
+ * hence:
*/
avgidle += idle - (avgidle>>cl->ewma_log);
}
@@ -739,22 +740,22 @@
cl->avgidle = avgidle;
/* Calculate expected time, when this class
- will be allowed to send.
- It will occur, when:
- (1-W)*true_avgidle + W*delay = 0, i.e.
- idle = (1/W - 1)*(-true_avgidle)
- or
- idle = (1 - W)*(-cl->avgidle);
+ * will be allowed to send.
+ * It will occur, when:
+ * (1-W)*true_avgidle + W*delay = 0, i.e.
+ * idle = (1/W - 1)*(-true_avgidle)
+ * or
+ * idle = (1 - W)*(-cl->avgidle);
*/
idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
/*
- That is not all.
- To maintain the rate allocated to the class,
- we add to undertime virtual clock,
- necessary to complete transmitted packet.
- (len/phys_bandwidth has been already passed
- to the moment of cbq_update)
+ * That is not all.
+ * To maintain the rate allocated to the class,
+ * we add to undertime virtual clock,
+ * necessary to complete transmitted packet.
+ * (len/phys_bandwidth has been already passed
+ * to the moment of cbq_update)
*/
idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@
cbq_update_toplevel(q, this, q->tx_borrowed);
}
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
cbq_under_limit(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@
do {
/* It is very suspicious place. Now overlimit
- action is generated for not bounded classes
- only if link is completely congested.
- Though it is in agree with ancestor-only paradigm,
- it looks very stupid. Particularly,
- it means that this chunk of code will either
- never be called or result in strong amplification
- of burstiness. Dangerous, silly, and, however,
- no another solution exists.
+ * action is generated for not bounded classes
+ * only if link is completely congested.
+ * Though it is in agree with ancestor-only paradigm,
+ * it looks very stupid. Particularly,
+ * it means that this chunk of code will either
+ * never be called or result in strong amplification
+ * of burstiness. Dangerous, silly, and, however,
+ * no another solution exists.
*/
- if ((cl = cl->borrow) == NULL) {
+ cl = cl->borrow;
+ if (!cl) {
this_cl->qstats.overlimits++;
this_cl->overlimit(this_cl);
return NULL;
@@ -814,7 +816,7 @@
return cl;
}
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
cbq_dequeue_prio(struct Qdisc *sch, int prio)
{
struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@
if (cl->deficit <= 0) {
/* Class exhausted its allotment per
- this round. Switch to the next one.
+ * this round. Switch to the next one.
*/
deficit = 1;
cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@
skb = cl->q->dequeue(cl->q);
/* Class did not give us any skb :-(
- It could occur even if cl->q->q.qlen != 0
- f.e. if cl->q == "tbf"
+ * It could occur even if cl->q->q.qlen != 0
+ * f.e. if cl->q == "tbf"
*/
if (skb == NULL)
goto skip_class;
@@ -878,7 +880,7 @@
skip_class:
if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
/* Class is empty or penalized.
- Unlink it from active chain.
+ * Unlink it from active chain.
*/
cl_prev->next_alive = cl->next_alive;
cl->next_alive = NULL;
@@ -917,14 +919,14 @@
return NULL;
}
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
cbq_dequeue_1(struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- unsigned activemask;
+ unsigned int activemask;
- activemask = q->activemask&0xFF;
+ activemask = q->activemask & 0xFF;
while (activemask) {
int prio = ffz(~activemask);
activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@
if (q->tx_class) {
psched_tdiff_t incr2;
/* Time integrator. We calculate EOS time
- by adding expected packet transmission time.
- If real time is greater, we warp artificial clock,
- so that:
-
- cbq_time = max(real_time, work);
+ * by adding expected packet transmission time.
+ * If real time is greater, we warp artificial clock,
+ * so that:
+ *
+ * cbq_time = max(real_time, work);
*/
incr2 = L2T(&q->link, q->tx_len);
q->now += incr2;
@@ -971,27 +973,27 @@
if (skb) {
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(sch);
return skb;
}
/* All the classes are overlimit.
-
- It is possible, if:
-
- 1. Scheduler is empty.
- 2. Toplevel cutoff inhibited borrowing.
- 3. Root class is overlimit.
-
- Reset 2d and 3d conditions and retry.
-
- Note, that NS and cbq-2.0 are buggy, peeking
- an arbitrary class is appropriate for ancestor-only
- sharing, but not for toplevel algorithm.
-
- Our version is better, but slower, because it requires
- two passes, but it is unavoidable with top-level sharing.
- */
+ *
+ * It is possible, if:
+ *
+ * 1. Scheduler is empty.
+ * 2. Toplevel cutoff inhibited borrowing.
+ * 3. Root class is overlimit.
+ *
+ * Reset 2d and 3d conditions and retry.
+ *
+ * Note, that NS and cbq-2.0 are buggy, peeking
+ * an arbitrary class is appropriate for ancestor-only
+ * sharing, but not for toplevel algorithm.
+ *
+ * Our version is better, but slower, because it requires
+ * two passes, but it is unavoidable with top-level sharing.
+ */
if (q->toplevel == TC_CBQ_MAXLEVEL &&
q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@
}
/* No packets in scheduler or nobody wants to give them to us :-(
- Sigh... start watchdog timer in the last case. */
+ * Sigh... start watchdog timer in the last case.
+ */
if (sch->q.qlen) {
sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@
int level = 0;
struct cbq_class *cl;
- if ((cl = this->children) != NULL) {
+ cl = this->children;
+ if (cl) {
do {
if (cl->level > level)
level = cl->level;
} while ((cl = cl->sibling) != this->children);
}
- this->level = level+1;
+ this->level = level + 1;
} while ((this = this->tparent) != NULL);
}
@@ -1046,14 +1050,15 @@
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
/* BUGGGG... Beware! This expression suffer of
- arithmetic overflows!
+ * arithmetic overflows!
*/
if (cl->priority == prio) {
cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
q->quanta[prio];
}
if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
- printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+ pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+ cl->common.classid, cl->quantum);
cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
}
}
@@ -1064,18 +1069,18 @@
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
struct cbq_class *split = cl->split;
- unsigned h;
+ unsigned int h;
int i;
if (split == NULL)
return;
- for (i=0; i<=TC_PRIO_MAX; i++) {
- if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+ for (i = 0; i <= TC_PRIO_MAX; i++) {
+ if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
split->defaults[i] = NULL;
}
- for (i=0; i<=TC_PRIO_MAX; i++) {
+ for (i = 0; i <= TC_PRIO_MAX; i++) {
int level = split->level;
if (split->defaults[i])
@@ -1088,7 +1093,7 @@
hlist_for_each_entry(c, n, &q->clhash.hash[h],
common.hnode) {
if (c->split == split && c->level < level &&
- c->defmap&(1<<i)) {
+ c->defmap & (1<<i)) {
split->defaults[i] = c;
level = c->level;
}
@@ -1102,7 +1107,8 @@
struct cbq_class *split = NULL;
if (splitid == 0) {
- if ((split = cl->split) == NULL)
+ split = cl->split;
+ if (!split)
return;
splitid = split->common.classid;
}
@@ -1120,9 +1126,9 @@
cl->defmap = 0;
cbq_sync_defmap(cl);
cl->split = split;
- cl->defmap = def&mask;
+ cl->defmap = def & mask;
} else
- cl->defmap = (cl->defmap&~mask)|(def&mask);
+ cl->defmap = (cl->defmap & ~mask) | (def & mask);
cbq_sync_defmap(cl);
}
@@ -1135,7 +1141,7 @@
qdisc_class_hash_remove(&q->clhash, &this->common);
if (this->tparent) {
- clp=&this->sibling;
+ clp = &this->sibling;
cl = *clp;
do {
if (cl == this) {
@@ -1174,7 +1180,7 @@
}
}
-static unsigned int cbq_drop(struct Qdisc* sch)
+static unsigned int cbq_drop(struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@
unsigned int len;
for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
- if ((cl_head = q->active[prio]) == NULL)
+ cl_head = q->active[prio];
+ if (!cl_head)
continue;
cl = cl_head;
@@ -1199,13 +1206,13 @@
}
static void
-cbq_reset(struct Qdisc* sch)
+cbq_reset(struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl;
struct hlist_node *n;
int prio;
- unsigned h;
+ unsigned int h;
q->activemask = 0;
q->pmask = 0;
@@ -1237,21 +1244,21 @@
static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
{
- if (lss->change&TCF_CBQ_LSS_FLAGS) {
- cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
- cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+ if (lss->change & TCF_CBQ_LSS_FLAGS) {
+ cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+ cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
}
- if (lss->change&TCF_CBQ_LSS_EWMA)
+ if (lss->change & TCF_CBQ_LSS_EWMA)
cl->ewma_log = lss->ewma_log;
- if (lss->change&TCF_CBQ_LSS_AVPKT)
+ if (lss->change & TCF_CBQ_LSS_AVPKT)
cl->avpkt = lss->avpkt;
- if (lss->change&TCF_CBQ_LSS_MINIDLE)
+ if (lss->change & TCF_CBQ_LSS_MINIDLE)
cl->minidle = -(long)lss->minidle;
- if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+ if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
cl->maxidle = lss->maxidle;
cl->avgidle = lss->maxidle;
}
- if (lss->change&TCF_CBQ_LSS_OFFTIME)
+ if (lss->change & TCF_CBQ_LSS_OFFTIME)
cl->offtime = lss->offtime;
return 0;
}
@@ -1279,10 +1286,10 @@
if (wrr->weight)
cl->weight = wrr->weight;
if (wrr->priority) {
- cl->priority = wrr->priority-1;
+ cl->priority = wrr->priority - 1;
cl->cpriority = cl->priority;
if (cl->priority >= cl->priority2)
- cl->priority2 = TC_CBQ_MAXPRIO-1;
+ cl->priority2 = TC_CBQ_MAXPRIO - 1;
}
cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@
cl->overlimit = cbq_ovl_delay;
break;
case TC_CBQ_OVL_LOWPRIO:
- if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
- ovl->priority2-1 <= cl->priority)
+ if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
+ ovl->priority2 - 1 <= cl->priority)
return -EINVAL;
- cl->priority2 = ovl->priority2-1;
+ cl->priority2 = ovl->priority2 - 1;
cl->overlimit = cbq_ovl_lowprio;
break;
case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@
if (!q->link.q)
q->link.q = &noop_qdisc;
- q->link.priority = TC_CBQ_MAXPRIO-1;
- q->link.priority2 = TC_CBQ_MAXPRIO-1;
- q->link.cpriority = TC_CBQ_MAXPRIO-1;
+ q->link.priority = TC_CBQ_MAXPRIO - 1;
+ q->link.priority2 = TC_CBQ_MAXPRIO - 1;
+ q->link.cpriority = TC_CBQ_MAXPRIO - 1;
q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
q->link.overlimit = cbq_ovl_classic;
q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@
return err;
}
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -1426,7 +1433,7 @@
return -1;
}
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@
return -1;
}
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_wrropt opt;
opt.flags = 0;
opt.allot = cl->allot;
- opt.priority = cl->priority+1;
- opt.cpriority = cl->cpriority+1;
+ opt.priority = cl->priority + 1;
+ opt.cpriority = cl->cpriority + 1;
opt.weight = cl->weight;
NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
return skb->len;
@@ -1469,13 +1476,13 @@
return -1;
}
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_ovl opt;
opt.strategy = cl->ovl_strategy;
- opt.priority2 = cl->priority2+1;
+ opt.priority2 = cl->priority2 + 1;
opt.pad = 0;
opt.penalty = cl->penalty;
NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@
return -1;
}
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@
}
#ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@
cbq_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
struct nlattr *nest;
if (cl->tparent)
@@ -1597,7 +1604,7 @@
struct gnet_dump *d)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
cl->qstats.qlen = cl->q->q.qlen;
cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@
static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old)
{
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
if (new == NULL) {
new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@
return 0;
}
-static struct Qdisc *
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
{
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
return cl->q;
}
@@ -1682,13 +1688,12 @@
kfree(cl);
}
-static void
-cbq_destroy(struct Qdisc* sch)
+static void cbq_destroy(struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct hlist_node *n, *next;
struct cbq_class *cl;
- unsigned h;
+ unsigned int h;
#ifdef CONFIG_NET_CLS_ACT
q->rx_class = NULL;
@@ -1712,7 +1717,7 @@
static void cbq_put(struct Qdisc *sch, unsigned long arg)
{
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
if (--cl->refcnt == 0) {
#ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@
{
int err;
struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class*)*arg;
+ struct cbq_class *cl = (struct cbq_class *)*arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_CBQ_MAX + 1];
struct cbq_class *parent;
@@ -1827,13 +1832,14 @@
if (classid) {
err = -EINVAL;
- if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+ if (TC_H_MAJ(classid ^ sch->handle) ||
+ cbq_class_lookup(q, classid))
goto failure;
} else {
int i;
- classid = TC_H_MAKE(sch->handle,0x8000);
+ classid = TC_H_MAKE(sch->handle, 0x8000);
- for (i=0; i<0x8000; i++) {
+ for (i = 0; i < 0x8000; i++) {
if (++q->hgenerator >= 0x8000)
q->hgenerator = 1;
if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@
cl->minidle = -0x7FFFFFFF;
cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- if (cl->ewma_log==0)
+ if (cl->ewma_log == 0)
cl->ewma_log = q->link.ewma_log;
- if (cl->maxidle==0)
+ if (cl->maxidle == 0)
cl->maxidle = q->link.maxidle;
- if (cl->avpkt==0)
+ if (cl->avpkt == 0)
cl->avpkt = q->link.avpkt;
cl->overlimit = cbq_ovl_classic;
if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@
static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
unsigned int qlen;
if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@
u32 classid)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *p = (struct cbq_class*)parent;
+ struct cbq_class *p = (struct cbq_class *)parent;
struct cbq_class *cl = cbq_class_lookup(q, classid);
if (cl) {
@@ -1992,7 +1998,7 @@
static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
- struct cbq_class *cl = (struct cbq_class*)arg;
+ struct cbq_class *cl = (struct cbq_class *)arg;
cl->filters--;
}
@@ -2002,7 +2008,7 @@
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl;
struct hlist_node *n;
- unsigned h;
+ unsigned int h;
if (arg->stop)
return;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0f7bf3f..2c79020 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@
mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
if (tb[TCA_DSMARK_VALUE])
- p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+ p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
if (tb[TCA_DSMARK_MASK])
- p->mask[*arg-1] = mask;
+ p->mask[*arg - 1] = mask;
err = 0;
@@ -155,8 +155,8 @@
if (!dsmark_valid_index(p, arg))
return -EINVAL;
- p->mask[arg-1] = 0xff;
- p->value[arg-1] = 0;
+ p->mask[arg - 1] = 0xff;
+ p->value[arg - 1] = 0;
return 0;
}
@@ -175,7 +175,7 @@
if (p->mask[i] == 0xff && !p->value[i])
goto ignore;
if (walker->count >= walker->skip) {
- if (walker->fn(sch, i+1, walker) < 0) {
+ if (walker->fn(sch, i + 1, walker) < 0) {
walker->stop = 1;
break;
}
@@ -304,9 +304,8 @@
* and don't need yet another qdisc as a bypass.
*/
if (p->mask[index] != 0xff || p->value[index])
- printk(KERN_WARNING
- "dsmark_dequeue: unsupported protocol %d\n",
- ntohs(skb->protocol));
+ pr_warning("dsmark_dequeue: unsupported protocol %d\n",
+ ntohs(skb->protocol));
break;
}
@@ -424,14 +423,14 @@
if (!dsmark_valid_index(p, cl))
return -EINVAL;
- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+ tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
tcm->tcm_info = p->q->handle;
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
- NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+ NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
+ NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index d468b47..be33f9d 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,12 +19,11 @@
/* 1 band FIFO pseudo-"scheduler" */
-struct fifo_sched_data
-{
+struct fifo_sched_data {
u32 limit;
};
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
@@ -34,7 +33,7 @@
return qdisc_reshape_fail(skb, sch);
}
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
@@ -44,7 +43,7 @@
return qdisc_reshape_fail(skb, sch);
}
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
@@ -62,11 +61,13 @@
static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
{
struct fifo_sched_data *q = qdisc_priv(sch);
+ bool bypass;
+ bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
- if (sch->ops == &bfifo_qdisc_ops)
+ if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
q->limit = limit;
@@ -79,6 +80,15 @@
q->limit = ctl->limit;
}
+ if (is_bfifo)
+ bypass = q->limit >= psched_mtu(qdisc_dev(sch));
+ else
+ bypass = q->limit >= 1;
+
+ if (bypass)
+ sch->flags |= TCQ_F_CAN_BYPASS;
+ else
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598..0da09d5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@
*/
kfree_skb(skb);
if (net_ratelimit())
- printk(KERN_WARNING "Dead loop on netdevice %s, "
- "fix it urgently!\n", dev_queue->dev->name);
+ pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
+ dev_queue->dev->name);
ret = qdisc_qlen(q);
} else {
/*
@@ -137,8 +137,8 @@
} else {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
- printk(KERN_WARNING "BUG %s code %d qlen %d\n",
- dev->name, ret, q->q.qlen);
+ pr_warning("BUG %s code %d qlen %d\n",
+ dev->name, ret, q->q.qlen);
ret = dev_requeue_skb(skb, q);
}
@@ -412,8 +412,9 @@
};
-static const u8 prio2band[TC_PRIO_MAX+1] =
- { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
/* 3-band FIFO queue: old style, but should be a bit faster than
generic prio+fifo combination.
@@ -445,7 +446,7 @@
return priv->q + band;
}
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
{
if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@
return qdisc_drop(skb, qdisc);
}
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@
return NULL;
}
-static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@
return NULL;
}
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
{
int prio;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@
{
struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
- memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+ memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
return skb->len;
@@ -526,6 +527,8 @@
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
skb_queue_head_init(band2list(priv, prio));
+ /* Can by-pass the queue discipline */
+ qdisc->flags |= TCQ_F_CAN_BYPASS;
return 0;
}
@@ -540,6 +543,7 @@
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
};
+EXPORT_SYMBOL(pfifo_fast_ops);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct Qdisc_ops *ops)
@@ -630,7 +634,7 @@
#ifdef CONFIG_NET_SCHED
qdisc_list_del(qdisc);
- qdisc_put_stab(qdisc->stab);
+ qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
if (ops->reset)
@@ -674,25 +678,21 @@
return oqdisc;
}
+EXPORT_SYMBOL(dev_graft_qdisc);
static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
{
- struct Qdisc *qdisc;
+ struct Qdisc *qdisc = &noqueue_qdisc;
if (dev->tx_queue_len) {
qdisc = qdisc_create_dflt(dev_queue,
&pfifo_fast_ops, TC_H_ROOT);
if (!qdisc) {
- printk(KERN_INFO "%s: activation failed\n", dev->name);
+ netdev_info(dev, "activation failed\n");
return;
}
-
- /* Can by-pass the queue discipline for default qdisc */
- qdisc->flags |= TCQ_F_CAN_BYPASS;
- } else {
- qdisc = &noqueue_qdisc;
}
dev_queue->qdisc_sleeping = qdisc;
}
@@ -761,6 +761,7 @@
dev_watchdog_up(dev);
}
}
+EXPORT_SYMBOL(dev_activate);
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
@@ -840,6 +841,7 @@
list_add(&dev->unreg_list, &single);
dev_deactivate_many(&single);
}
+EXPORT_SYMBOL(dev_deactivate);
static void dev_init_scheduler_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2a..b9493a0 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
struct gred_sched_data;
struct gred_sched;
-struct gred_sched_data
-{
+struct gred_sched_data {
u32 limit; /* HARD maximal queue length */
u32 DP; /* the drop pramaters */
u32 bytesin; /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@
GRED_RIO_MODE,
};
-struct gred_sched
-{
+struct gred_sched {
struct gred_sched_data *tab[MAX_DPs];
unsigned long flags;
u32 red_flags;
@@ -150,17 +148,18 @@
return t->red_flags & TC_RED_HARDDROP;
}
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- struct gred_sched_data *q=NULL;
- struct gred_sched *t= qdisc_priv(sch);
+ struct gred_sched_data *q = NULL;
+ struct gred_sched *t = qdisc_priv(sch);
unsigned long qavg = 0;
u16 dp = tc_index_to_dp(skb);
- if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
dp = t->def;
- if ((q = t->tab[dp]) == NULL) {
+ q = t->tab[dp];
+ if (!q) {
/* Pass through packets not assigned to a DP
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@
for (i = 0; i < t->DPs; i++) {
if (t->tab[i] && t->tab[i]->prio < q->prio &&
!red_is_idling(&t->tab[i]->parms))
- qavg +=t->tab[i]->parms.qavg;
+ qavg += t->tab[i]->parms.qavg;
}
}
@@ -203,28 +202,28 @@
gred_store_wred_set(t, q);
switch (red_action(&q->parms, q->parms.qavg + qavg)) {
- case RED_DONT_MARK:
- break;
+ case RED_DONT_MARK:
+ break;
- case RED_PROB_MARK:
- sch->qstats.overlimits++;
- if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
- q->stats.prob_drop++;
- goto congestion_drop;
- }
+ case RED_PROB_MARK:
+ sch->qstats.overlimits++;
+ if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+ q->stats.prob_drop++;
+ goto congestion_drop;
+ }
- q->stats.prob_mark++;
- break;
+ q->stats.prob_mark++;
+ break;
- case RED_HARD_MARK:
- sch->qstats.overlimits++;
- if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
- !INET_ECN_set_ce(skb)) {
- q->stats.forced_drop++;
- goto congestion_drop;
- }
- q->stats.forced_mark++;
- break;
+ case RED_HARD_MARK:
+ sch->qstats.overlimits++;
+ if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+ !INET_ECN_set_ce(skb)) {
+ q->stats.forced_drop++;
+ goto congestion_drop;
+ }
+ q->stats.forced_mark++;
+ break;
}
if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@
return NET_XMIT_CN;
}
-static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb;
struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@
if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
if (net_ratelimit())
- printk(KERN_WARNING "GRED: Unable to relocate "
- "VQ 0x%x after dequeue, screwing up "
- "backlog.\n", tc_index_to_dp(skb));
+ pr_warning("GRED: Unable to relocate VQ 0x%x "
+ "after dequeue, screwing up "
+ "backlog.\n", tc_index_to_dp(skb));
} else {
q->backlog -= qdisc_pkt_len(skb);
@@ -273,7 +272,7 @@
return NULL;
}
-static unsigned int gred_drop(struct Qdisc* sch)
+static unsigned int gred_drop(struct Qdisc *sch)
{
struct sk_buff *skb;
struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@
if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
if (net_ratelimit())
- printk(KERN_WARNING "GRED: Unable to relocate "
- "VQ 0x%x while dropping, screwing up "
- "backlog.\n", tc_index_to_dp(skb));
+ pr_warning("GRED: Unable to relocate VQ 0x%x "
+ "while dropping, screwing up "
+ "backlog.\n", tc_index_to_dp(skb));
} else {
q->backlog -= len;
q->stats.other++;
@@ -308,7 +307,7 @@
}
-static void gred_reset(struct Qdisc* sch)
+static void gred_reset(struct Qdisc *sch)
{
int i;
struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@
for (i = table->DPs; i < MAX_DPs; i++) {
if (table->tab[i]) {
- printk(KERN_WARNING "GRED: Warning: Destroying "
- "shadowed VQ 0x%x\n", i);
+ pr_warning("GRED: Warning: Destroying "
+ "shadowed VQ 0x%x\n", i);
gred_destroy_vq(table->tab[i]);
table->tab[i] = NULL;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 14a799d..6488e64 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
* that are expensive on 32-bit architectures.
*/
-struct internal_sc
-{
+struct internal_sc {
u64 sm1; /* scaled slope of the 1st segment */
u64 ism1; /* scaled inverse-slope of the 1st segment */
u64 dx; /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@
};
/* runtime service curve */
-struct runtime_sc
-{
+struct runtime_sc {
u64 x; /* current starting position on x-axis */
u64 y; /* current starting position on y-axis */
u64 sm1; /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@
u64 ism2; /* scaled inverse-slope of the 2nd segment */
};
-enum hfsc_class_flags
-{
+enum hfsc_class_flags {
HFSC_RSC = 0x1,
HFSC_FSC = 0x2,
HFSC_USC = 0x4
};
-struct hfsc_class
-{
+struct hfsc_class {
struct Qdisc_class_common cl_common;
unsigned int refcnt; /* usage count */
@@ -140,8 +136,8 @@
u64 cl_cumul; /* cumulative work in bytes done by
real-time criteria */
- u64 cl_d; /* deadline*/
- u64 cl_e; /* eligible time */
+ u64 cl_d; /* deadline*/
+ u64 cl_e; /* eligible time */
u64 cl_vt; /* virtual time */
u64 cl_f; /* time when this class will fit for
link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@
unsigned long cl_nactive; /* number of active children */
};
-struct hfsc_sched
-{
+struct hfsc_sched {
u16 defcls; /* default class id */
struct hfsc_class root; /* root class */
struct Qdisc_class_hash clhash; /* class hash */
@@ -693,7 +688,7 @@
if (go_active) {
n = rb_last(&cl->cl_parent->vt_tree);
if (n != NULL) {
- max_cl = rb_entry(n, struct hfsc_class,vt_node);
+ max_cl = rb_entry(n, struct hfsc_class, vt_node);
/*
* set vt to the average of the min and max
* classes. if the parent's period didn't
@@ -1177,8 +1172,10 @@
return NULL;
}
#endif
- if ((cl = (struct hfsc_class *)res.class) == NULL) {
- if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+ cl = (struct hfsc_class *)res.class;
+ if (!cl) {
+ cl = hfsc_find_class(res.classid, sch);
+ if (!cl)
break; /* filter selected invalid classid */
if (cl->level >= head->level)
break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@
return -1;
}
-static inline int
+static int
hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
{
if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@
struct hfsc_class *cl;
u64 next_time = 0;
- if ((cl = eltree_get_minel(q)) != NULL)
+ cl = eltree_get_minel(q);
+ if (cl)
next_time = cl->cl_e;
if (q->root.cl_cfmin != 0) {
if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@
* find the class with the minimum deadline among
* the eligible classes.
*/
- if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+ cl = eltree_get_mindl(q, cur_time);
+ if (cl) {
realtime = 1;
} else {
/*
@@ -1664,7 +1663,7 @@
set_passive(cl);
}
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fc12fe6..e1429a8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@
struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
/* When class changes from state 1->2 and disconnects from
- parent's feed then we lost ptr value and start from the
- first child again. Here we store classid of the
- last valid ptr (used when ptr is NULL). */
+ * parent's feed then we lost ptr value and start from the
+ * first child again. Here we store classid of the
+ * last valid ptr (used when ptr is NULL).
+ */
u32 last_ptr_id[TC_HTB_NUMPRIO];
} inner;
} un;
@@ -185,7 +186,7 @@
* have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
* then finish and return direct queue.
*/
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
@@ -197,11 +198,13 @@
int result;
/* allow to select class by setting skb->priority to valid classid;
- note that nfmark can be used too by attaching filter fw with no
- rules in it */
+ * note that nfmark can be used too by attaching filter fw with no
+ * rules in it
+ */
if (skb->priority == sch->handle)
return HTB_DIRECT; /* X:0 (direct flow) selected */
- if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+ cl = htb_find(skb->priority, sch);
+ if (cl && cl->level == 0)
return cl;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@
return NULL;
}
#endif
- if ((cl = (void *)res.class) == NULL) {
+ cl = (void *)res.class;
+ if (!cl) {
if (res.classid == sch->handle)
return HTB_DIRECT; /* X:0 (direct flow) */
- if ((cl = htb_find(res.classid, sch)) == NULL)
+ cl = htb_find(res.classid, sch);
+ if (!cl)
break; /* filter selected invalid classid */
}
if (!cl->level)
@@ -378,7 +383,8 @@
if (p->un.inner.feed[prio].rb_node)
/* parent already has its feed in use so that
- reset bit in mask as parent is already ok */
+ * reset bit in mask as parent is already ok
+ */
mask &= ~(1 << prio);
htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@
if (p->un.inner.ptr[prio] == cl->node + prio) {
/* we are removing child which is pointed to from
- parent feed - forget the pointer but remember
- classid */
+ * parent feed - forget the pointer but remember
+ * classid
+ */
p->un.inner.last_ptr_id[prio] = cl->common.classid;
p->un.inner.ptr[prio] = NULL;
}
@@ -663,8 +670,9 @@
unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
- 1 to simplify things when jiffy is going to be incremented
- too soon */
+ * 1 to simplify things when jiffy is going to be incremented
+ * too soon
+ */
unsigned long stop_at = start + 2;
while (time_before(jiffies, stop_at)) {
struct htb_class *cl;
@@ -687,7 +695,7 @@
/* too much load - let's continue after a break for scheduling */
if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
- printk(KERN_WARNING "htb: too many events!\n");
+ pr_warning("htb: too many events!\n");
q->warned |= HTB_WARN_TOOMANYEVENTS;
}
@@ -695,7 +703,8 @@
}
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
- is no such one exists. */
+ * is no such one exists.
+ */
static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
u32 id)
{
@@ -739,12 +748,14 @@
for (i = 0; i < 65535; i++) {
if (!*sp->pptr && *sp->pid) {
/* ptr was invalidated but id is valid - try to recover
- the original or next ptr */
+ * the original or next ptr
+ */
*sp->pptr =
htb_id_find_next_upper(prio, sp->root, *sp->pid);
}
*sp->pid = 0; /* ptr is valid now so that remove this hint as it
- can become out of date quickly */
+ * can become out of date quickly
+ */
if (!*sp->pptr) { /* we are at right end; rewind & go up */
*sp->pptr = sp->root;
while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@
}
/* dequeues packet at given priority and level; call only if
- you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
int level)
{
@@ -789,9 +801,10 @@
return NULL;
/* class can be empty - it is unlikely but can be true if leaf
- qdisc drops packets in enqueue routine or if someone used
- graft operation on the leaf since last dequeue;
- simply deactivate and skip such class */
+ * qdisc drops packets in enqueue routine or if someone used
+ * graft operation on the leaf since last dequeue;
+ * simply deactivate and skip such class
+ */
if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
struct htb_class *next;
htb_deactivate(q, cl);
@@ -831,7 +844,8 @@
ptr[0]) + prio);
}
/* this used to be after charge_class but this constelation
- gives us slightly better performance */
+ * gives us slightly better performance
+ */
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@
if (skb != NULL) {
ok:
qdisc_bstats_update(sch, skb);
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(sch);
sch->q.qlen--;
return skb;
}
@@ -883,6 +897,7 @@
m = ~q->row_mask[level];
while (m != (int)(-1)) {
int prio = ffz(m);
+
m |= 1 << prio;
skb = htb_dequeue_tree(q, prio, level);
if (likely(skb != NULL))
@@ -987,13 +1002,12 @@
return err;
if (tb[TCA_HTB_INIT] == NULL) {
- printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+ pr_err("HTB: hey probably you have bad tc tool ?\n");
return -EINVAL;
}
gopt = nla_data(tb[TCA_HTB_INIT]);
if (gopt->version != HTB_VER >> 16) {
- printk(KERN_ERR
- "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+ pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
return -EINVAL;
}
@@ -1206,9 +1220,10 @@
cancel_work_sync(&q->work);
qdisc_watchdog_cancel(&q->watchdog);
/* This line used to be after htb_destroy_class call below
- and surprisingly it worked in 2.4. But it must precede it
- because filter need its target class alive to be able to call
- unbind_filter on it (without Oops). */
+ * and surprisingly it worked in 2.4. But it must precede it
+ * because filter need its target class alive to be able to call
+ * unbind_filter on it (without Oops).
+ */
tcf_destroy_chain(&q->filter_list);
for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@
/* check maximal depth */
if (parent && parent->parent && parent->parent->level < 2) {
- printk(KERN_ERR "htb: tree is too deep\n");
+ pr_err("htb: tree is too deep\n");
goto failure;
}
err = -ENOBUFS;
- if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+ cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+ if (!cl)
goto failure;
err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@
RB_CLEAR_NODE(&cl->node[prio]);
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
- so that can't be used inside of sch_tree_lock
- -- thanks to Karlis Peisenieks */
+ * so that can't be used inside of sch_tree_lock
+ * -- thanks to Karlis Peisenieks
+ */
new_q = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, classid);
sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@
}
/* it used to be a nasty bug here, we have to check that node
- is really leaf before changing cl->un.leaf ! */
+ * is really leaf before changing cl->un.leaf !
+ */
if (!cl->level) {
cl->quantum = rtab->rate.rate / q->rate2quantum;
if (!hopt->quantum && cl->quantum < 1000) {
- printk(KERN_WARNING
+ pr_warning(
"HTB: quantum of class %X is small. Consider r2q change.\n",
cl->common.classid);
cl->quantum = 1000;
}
if (!hopt->quantum && cl->quantum > 200000) {
- printk(KERN_WARNING
+ pr_warning(
"HTB: quantum of class %X is big. Consider r2q change.\n",
cl->common.classid);
cl->quantum = 200000;
@@ -1478,13 +1496,13 @@
struct htb_class *cl = htb_find(classid, sch);
/*if (cl && !cl->level) return 0;
- The line above used to be there to prevent attaching filters to
- leaves. But at least tc_index filter uses this just to get class
- for other reasons so that we have to allow for it.
- ----
- 19.6.2002 As Werner explained it is ok - bind filter is just
- another way to "lock" the class - unlike "get" this lock can
- be broken by class during destroy IIUC.
+ * The line above used to be there to prevent attaching filters to
+ * leaves. But at least tc_index filter uses this just to get class
+ * for other reasons so that we have to allow for it.
+ * ----
+ * 19.6.2002 As Werner explained it is ok - bind filter is just
+ * another way to "lock" the class - unlike "get" this lock can
+ * be broken by class during destroy IIUC.
*/
if (cl)
cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ecc302f..ec5cbc8 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@
TC_H_MIN(ntx + 1)));
if (qdisc == NULL)
goto err;
- qdisc->flags |= TCQ_F_CAN_BYPASS;
priv->qdiscs[ntx] = qdisc;
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 0000000..effd4ee
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,416 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+ struct Qdisc **qdiscs;
+ int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ unsigned int ntx;
+
+ if (!priv->qdiscs)
+ return;
+
+ for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+ qdisc_destroy(priv->qdiscs[ntx]);
+
+ if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+ dev->netdev_ops->ndo_setup_tc(dev, 0);
+ else
+ netdev_set_num_tc(dev, 0);
+
+ kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+ int i, j;
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE)
+ return -EINVAL;
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i < TC_BITMASK + 1; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc)
+ return -EINVAL;
+ }
+
+ /* net_device does not support requested operation */
+ if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+ return -EINVAL;
+
+ /* if hw owned qcount and qoffset are taken from LLD so
+ * no reason to verify them here
+ */
+ if (qopt->hw)
+ return 0;
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->real_num_tx_queues ||
+ !qopt->count[i] ||
+ last > dev->real_num_tx_queues)
+ return -EINVAL;
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (last > qopt->offset[j])
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct netdev_queue *dev_queue;
+ struct Qdisc *qdisc;
+ int i, err = -EOPNOTSUPP;
+ struct tc_mqprio_qopt *qopt = NULL;
+
+ BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+ BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+
+ if (sch->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (!netif_is_multiqueue(dev))
+ return -EOPNOTSUPP;
+
+ if (nla_len(opt) < sizeof(*qopt))
+ return -EINVAL;
+
+ qopt = nla_data(opt);
+ if (mqprio_parse_opt(dev, qopt))
+ return -EINVAL;
+
+ /* pre-allocate qdisc, attachment can't fail */
+ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+ GFP_KERNEL);
+ if (priv->qdiscs == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ dev_queue = netdev_get_tx_queue(dev, i);
+ qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(i + 1)));
+ if (qdisc == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ priv->qdiscs[i] = qdisc;
+ }
+
+ /* If the mqprio options indicate that hardware should own
+ * the queue mapping then run ndo_setup_tc otherwise use the
+ * supplied and verified mapping
+ */
+ if (qopt->hw) {
+ priv->hw_owned = 1;
+ err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+ if (err)
+ goto err;
+ } else {
+ netdev_set_num_tc(dev, qopt->num_tc);
+ for (i = 0; i < qopt->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ qopt->count[i], qopt->offset[i]);
+ }
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i < TC_BITMASK + 1; i++)
+ netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+
+ sch->flags |= TCQ_F_MQROOT;
+ return 0;
+
+err:
+ mqprio_destroy(sch);
+ return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ unsigned int ntx;
+
+ /* Attach underlying qdisc */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ qdisc = priv->qdiscs[ntx];
+ qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ if (qdisc)
+ qdisc_destroy(qdisc);
+ }
+ kfree(priv->qdiscs);
+ priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+ unsigned long cl)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+ if (ntx >= dev->num_tx_queues)
+ return NULL;
+ return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+ struct Qdisc **old)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return -EINVAL;
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ *old = dev_graft_qdisc(dev_queue, new);
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tc_mqprio_qopt opt = { 0 };
+ struct Qdisc *qdisc;
+ unsigned int i;
+
+ sch->q.qlen = 0;
+ memset(&sch->bstats, 0, sizeof(sch->bstats));
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ spin_lock_bh(qdisc_lock(qdisc));
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.qlen += qdisc->qstats.qlen;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+
+ opt.num_tc = netdev_get_num_tc(dev);
+ memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+ opt.hw = priv->hw_owned;
+
+ for (i = 0; i < netdev_get_num_tc(dev); i++) {
+ opt.count[i] = dev->tc_to_txq[i].count;
+ opt.offset[i] = dev->tc_to_txq[i].offset;
+ }
+
+ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return NULL;
+
+ return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx = TC_H_MIN(classid);
+
+ if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+ return 0;
+ return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct net_device *dev = qdisc_dev(sch);
+
+ if (cl <= netdev_get_num_tc(dev)) {
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_info = 0;
+ } else {
+ int i;
+ struct netdev_queue *dev_queue;
+
+ dev_queue = mqprio_queue_get(sch, cl);
+ tcm->tcm_parent = 0;
+ for (i = 0; i < netdev_get_num_tc(dev); i++) {
+ struct netdev_tc_txq tc = dev->tc_to_txq[i];
+ int q_idx = cl - netdev_get_num_tc(dev);
+
+ if (q_idx > tc.offset &&
+ q_idx <= tc.offset + tc.count) {
+ tcm->tcm_parent =
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(i + 1));
+ break;
+ }
+ }
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ }
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct net_device *dev = qdisc_dev(sch);
+
+ if (cl <= netdev_get_num_tc(dev)) {
+ int i;
+ struct Qdisc *qdisc;
+ struct gnet_stats_queue qstats = {0};
+ struct gnet_stats_basic_packed bstats = {0};
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+ /* Drop lock here it will be reclaimed before touching
+ * statistics this is required because the d->lock we
+ * hold here is the look on dev_queue->qdisc_sleeping
+ * also acquired below.
+ */
+ spin_unlock_bh(d->lock);
+
+ for (i = tc.offset; i < tc.offset + tc.count; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ spin_lock_bh(qdisc_lock(qdisc));
+ bstats.bytes += qdisc->bstats.bytes;
+ bstats.packets += qdisc->bstats.packets;
+ qstats.qlen += qdisc->qstats.qlen;
+ qstats.backlog += qdisc->qstats.backlog;
+ qstats.drops += qdisc->qstats.drops;
+ qstats.requeues += qdisc->qstats.requeues;
+ qstats.overlimits += qdisc->qstats.overlimits;
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+ /* Reclaim root sleeping lock before completing stats */
+ spin_lock_bh(d->lock);
+ if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+ gnet_stats_copy_queue(d, &qstats) < 0)
+ return -1;
+ } else {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+ sch->qstats.qlen = sch->q.qlen;
+ if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, &sch->qstats) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx;
+
+ if (arg->stop)
+ return;
+
+ /* Walk hierarchy with a virtual class per tc */
+ arg->count = arg->skip;
+ for (ntx = arg->skip;
+ ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+ ntx++) {
+ if (arg->fn(sch, ntx + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+ .graft = mqprio_graft,
+ .leaf = mqprio_leaf,
+ .get = mqprio_get,
+ .put = mqprio_put,
+ .walk = mqprio_walk,
+ .dump = mqprio_dump_class,
+ .dump_stats = mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+ .cl_ops = &mqprio_class_ops,
+ .id = "mqprio",
+ .priv_size = sizeof(struct mqprio_sched),
+ .init = mqprio_init,
+ .destroy = mqprio_destroy,
+ .attach = mqprio_attach,
+ .dump = mqprio_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+ return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+ unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 436a2e7..edc1950 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -156,7 +156,7 @@
unsigned int len;
struct Qdisc *qdisc;
- for (band = q->bands-1; band >= 0; band--) {
+ for (band = q->bands - 1; band >= 0; band--) {
qdisc = q->queues[band];
if (qdisc->ops->drop) {
len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@
for (i = 0; i < q->max_bands; i++)
q->queues[i] = &noop_qdisc;
- err = multiq_tune(sch,opt);
+ err = multiq_tune(sch, opt);
if (err)
kfree(q->queues);
@@ -346,7 +346,7 @@
struct multiq_sched_data *q = qdisc_priv(sch);
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = q->queues[cl-1]->handle;
+ tcm->tcm_info = q->queues[cl - 1]->handle;
return 0;
}
@@ -378,7 +378,7 @@
arg->count++;
continue;
}
- if (arg->fn(sch, band+1, arg) < 0) {
+ if (arg->fn(sch, band + 1, arg) < 0) {
arg->stop = 1;
break;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6a3006b..64f0d32 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -211,8 +211,8 @@
}
cb = netem_skb_cb(skb);
- if (q->gap == 0 || /* not doing reordering */
- q->counter < q->gap || /* inside last reordering gap */
+ if (q->gap == 0 || /* not doing reordering */
+ q->counter < q->gap || /* inside last reordering gap */
q->reorder < get_crandom(&q->reorder_cor)) {
psched_time_t now;
psched_tdiff_t delay;
@@ -248,7 +248,7 @@
return ret;
}
-static unsigned int netem_drop(struct Qdisc* sch)
+static unsigned int netem_drop(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
unsigned int len = 0;
@@ -265,7 +265,7 @@
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- if (sch->flags & TCQ_F_THROTTLED)
+ if (qdisc_is_throttled(sch))
return NULL;
skb = q->qdisc->ops->peek(q->qdisc);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fbd710d..2a318f2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
#include <net/pkt_sched.h>
-struct prio_sched_data
-{
+struct prio_sched_data {
int bands;
struct tcf_proto *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@
if (!q->filter_list || err < 0) {
if (TC_H_MAJ(band))
band = 0;
- return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+ return q->queues[q->prio2band[band & TC_PRIO_MAX]];
}
band = res.classid;
}
@@ -106,7 +105,7 @@
return NULL;
}
-static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
int prio;
@@ -124,7 +123,7 @@
}
-static unsigned int prio_drop(struct Qdisc* sch)
+static unsigned int prio_drop(struct Qdisc *sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
int prio;
@@ -143,24 +142,24 @@
static void
-prio_reset(struct Qdisc* sch)
+prio_reset(struct Qdisc *sch)
{
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- for (prio=0; prio<q->bands; prio++)
+ for (prio = 0; prio < q->bands; prio++)
qdisc_reset(q->queues[prio]);
sch->q.qlen = 0;
}
static void
-prio_destroy(struct Qdisc* sch)
+prio_destroy(struct Qdisc *sch)
{
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
tcf_destroy_chain(&q->filter_list);
- for (prio=0; prio<q->bands; prio++)
+ for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -177,7 +176,7 @@
if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
return -EINVAL;
- for (i=0; i<=TC_PRIO_MAX; i++) {
+ for (i = 0; i <= TC_PRIO_MAX; i++) {
if (qopt->priomap[i] >= qopt->bands)
return -EINVAL;
}
@@ -186,7 +185,7 @@
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
- for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+ for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@
}
sch_tree_unlock(sch);
- for (i=0; i<q->bands; i++) {
+ for (i = 0; i < q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
struct Qdisc *child, *old;
+
child = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@
struct prio_sched_data *q = qdisc_priv(sch);
int i;
- for (i=0; i<TCQ_PRIO_BANDS; i++)
+ for (i = 0; i < TCQ_PRIO_BANDS; i++)
q->queues[i] = &noop_qdisc;
if (opt == NULL) {
@@ -232,7 +232,7 @@
} else {
int err;
- if ((err= prio_tune(sch, opt)) != 0)
+ if ((err = prio_tune(sch, opt)) != 0)
return err;
}
return 0;
@@ -245,7 +245,7 @@
struct tc_prio_qopt opt;
opt.bands = q->bands;
- memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+ memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -342,7 +342,7 @@
arg->count++;
continue;
}
- if (arg->fn(sch, prio+1, arg) < 0) {
+ if (arg->fn(sch, prio + 1, arg) < 0) {
arg->stop = 1;
break;
}
@@ -350,7 +350,7 @@
}
}
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9f98dbd..6649463 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
if RED works correctly.
*/
-struct red_sched_data
-{
+struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
struct red_parms parms;
@@ -55,7 +54,7 @@
return q->flags & TC_RED_HARDDROP;
}
-static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@
red_end_of_idle_period(&q->parms);
switch (red_action(&q->parms, q->parms.qavg)) {
- case RED_DONT_MARK:
- break;
+ case RED_DONT_MARK:
+ break;
- case RED_PROB_MARK:
- sch->qstats.overlimits++;
- if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
- q->stats.prob_drop++;
- goto congestion_drop;
- }
+ case RED_PROB_MARK:
+ sch->qstats.overlimits++;
+ if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+ q->stats.prob_drop++;
+ goto congestion_drop;
+ }
- q->stats.prob_mark++;
- break;
+ q->stats.prob_mark++;
+ break;
- case RED_HARD_MARK:
- sch->qstats.overlimits++;
- if (red_use_harddrop(q) || !red_use_ecn(q) ||
- !INET_ECN_set_ce(skb)) {
- q->stats.forced_drop++;
- goto congestion_drop;
- }
+ case RED_HARD_MARK:
+ sch->qstats.overlimits++;
+ if (red_use_harddrop(q) || !red_use_ecn(q) ||
+ !INET_ECN_set_ce(skb)) {
+ q->stats.forced_drop++;
+ goto congestion_drop;
+ }
- q->stats.forced_mark++;
- break;
+ q->stats.forced_mark++;
+ break;
}
ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@
return NET_XMIT_CN;
}
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb;
struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@
return skb;
}
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@
return child->ops->peek(child);
}
-static unsigned int red_drop(struct Qdisc* sch)
+static unsigned int red_drop(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@
return 0;
}
-static void red_reset(struct Qdisc* sch)
+static void red_reset(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -217,7 +216,7 @@
return 0;
}
-static int red_init(struct Qdisc* sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
{
struct red_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index edea8ce..4cff442 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
#include <linux/skbuff.h>
#include <linux/jhash.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
#define SFQ_DEPTH 128 /* max number of packets per flow */
#define SFQ_SLOTS 128 /* max number of flows */
#define SFQ_EMPTY_SLOT 255
-#define SFQ_HASH_DIVISOR 1024
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
+
/* We use 16 bits to store allot, and want to handle packets up to 64K
* Scale allot by 8 (1<<3) so that no overflow occurs.
*/
@@ -92,8 +94,7 @@
* while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
* are 'pointers' to dep[] array
*/
-struct sfq_head
-{
+struct sfq_head {
sfq_index next;
sfq_index prev;
};
@@ -108,13 +109,12 @@
short allot; /* credit for this slot */
};
-struct sfq_sched_data
-{
+struct sfq_sched_data {
/* Parameters */
int perturb_period;
- unsigned quantum; /* Allotment per round: MUST BE >= MTU */
+ unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
int limit;
-
+ unsigned int divisor; /* number of slots in hash table */
/* Variables */
struct tcf_proto *filter_list;
struct timer_list perturb_timer;
@@ -122,7 +122,7 @@
sfq_index cur_depth; /* depth of longest slot */
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct sfq_slot *tail; /* current slot in round */
- sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
+ sfq_index *ht; /* Hash table (divisor slots) */
struct sfq_slot slots[SFQ_SLOTS];
struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
};
@@ -137,12 +137,12 @@
return &q->dep[val - SFQ_SLOTS];
}
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
{
- return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+ return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
}
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
{
u32 h, h2;
@@ -157,13 +157,13 @@
iph = ip_hdr(skb);
h = (__force u32)iph->daddr;
h2 = (__force u32)iph->saddr ^ iph->protocol;
- if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ if (iph->frag_off & htons(IP_MF | IP_OFFSET))
break;
poff = proto_ports_offset(iph->protocol);
if (poff >= 0 &&
pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
iph = ip_hdr(skb);
- h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+ h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
}
break;
}
@@ -181,7 +181,7 @@
if (poff >= 0 &&
pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
iph = ipv6_hdr(skb);
- h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+ h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
}
break;
}
@@ -203,7 +203,7 @@
if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+ TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
if (!q->filter_list)
@@ -221,7 +221,7 @@
return 0;
}
#endif
- if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+ if (TC_H_MIN(res.classid) <= q->divisor)
return TC_H_MIN(res.classid);
}
return 0;
@@ -497,7 +497,11 @@
q->perturb_period = ctl->perturb_period * HZ;
if (ctl->limit)
q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-
+ if (ctl->divisor) {
+ if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)
+ return -EINVAL;
+ q->divisor = ctl->divisor;
+ }
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit)
sfq_drop(sch);
@@ -515,15 +519,13 @@
static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
+ size_t sz;
int i;
q->perturb_timer.function = sfq_perturbation;
q->perturb_timer.data = (unsigned long)sch;
init_timer_deferrable(&q->perturb_timer);
- for (i = 0; i < SFQ_HASH_DIVISOR; i++)
- q->ht[i] = SFQ_EMPTY_SLOT;
-
for (i = 0; i < SFQ_DEPTH; i++) {
q->dep[i].next = i + SFQ_SLOTS;
q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +534,7 @@
q->limit = SFQ_DEPTH - 1;
q->cur_depth = 0;
q->tail = NULL;
+ q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
if (opt == NULL) {
q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +546,23 @@
return err;
}
+ sz = sizeof(q->ht[0]) * q->divisor;
+ q->ht = kmalloc(sz, GFP_KERNEL);
+ if (!q->ht && sz > PAGE_SIZE)
+ q->ht = vmalloc(sz);
+ if (!q->ht)
+ return -ENOMEM;
+ for (i = 0; i < q->divisor; i++)
+ q->ht[i] = SFQ_EMPTY_SLOT;
+
for (i = 0; i < SFQ_SLOTS; i++) {
slot_queue_init(&q->slots[i]);
sfq_link(q, i);
}
+ if (q->limit >= 1)
+ sch->flags |= TCQ_F_CAN_BYPASS;
+ else
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
@@ -557,6 +573,10 @@
tcf_destroy_chain(&q->filter_list);
q->perturb_period = 0;
del_timer_sync(&q->perturb_timer);
+ if (is_vmalloc_addr(q->ht))
+ vfree(q->ht);
+ else
+ kfree(q->ht);
}
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +589,7 @@
opt.perturb_period = q->perturb_period / HZ;
opt.limit = q->limit;
- opt.divisor = SFQ_HASH_DIVISOR;
+ opt.divisor = q->divisor;
opt.flows = q->limit;
NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +614,8 @@
static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
+ /* we cannot bypass queue discipline anymore */
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
@@ -647,7 +669,7 @@
if (arg->stop)
return;
- for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+ for (i = 0; i < q->divisor; i++) {
if (q->ht[i] == SFQ_EMPTY_SLOT ||
arg->count < arg->skip) {
arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index e931658..1dcfb52 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
changed the limit is not effective anymore.
*/
-struct tbf_sched_data
-{
+struct tbf_sched_data {
/* Parameters */
u32 limit; /* Maximal length of backlog: bytes */
u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@
struct qdisc_watchdog watchdog; /* Watchdog timer */
};
-#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
@@ -137,7 +136,7 @@
return NET_XMIT_SUCCESS;
}
-static unsigned int tbf_drop(struct Qdisc* sch)
+static unsigned int tbf_drop(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
unsigned int len = 0;
@@ -149,7 +148,7 @@
return len;
}
-static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
@@ -185,7 +184,7 @@
q->tokens = toks;
q->ptokens = ptoks;
sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
}
@@ -209,7 +208,7 @@
return NULL;
}
-static void tbf_reset(struct Qdisc* sch)
+static void tbf_reset(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -227,7 +226,7 @@
[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};
-static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
{
int err;
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@
struct qdisc_rate_table *rtab = NULL;
struct qdisc_rate_table *ptab = NULL;
struct Qdisc *child = NULL;
- int max_size,n;
+ int max_size, n;
err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
if (err < 0)
@@ -259,15 +258,18 @@
}
for (n = 0; n < 256; n++)
- if (rtab->data[n] > qopt->buffer) break;
- max_size = (n << qopt->rate.cell_log)-1;
+ if (rtab->data[n] > qopt->buffer)
+ break;
+ max_size = (n << qopt->rate.cell_log) - 1;
if (ptab) {
int size;
for (n = 0; n < 256; n++)
- if (ptab->data[n] > qopt->mtu) break;
- size = (n << qopt->peakrate.cell_log)-1;
- if (size < max_size) max_size = size;
+ if (ptab->data[n] > qopt->mtu)
+ break;
+ size = (n << qopt->peakrate.cell_log) - 1;
+ if (size < max_size)
+ max_size = size;
}
if (max_size < 0)
goto done;
@@ -310,7 +312,7 @@
return err;
}
-static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -422,8 +424,7 @@
}
}
-static const struct Qdisc_class_ops tbf_class_ops =
-{
+static const struct Qdisc_class_ops tbf_class_ops = {
.graft = tbf_graft,
.leaf = tbf_leaf,
.get = tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d84e732..45cd300 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
which will not break load balancing, though native slave
traffic will have the highest priority. */
-struct teql_master
-{
+struct teql_master {
struct Qdisc_ops qops;
struct net_device *dev;
struct Qdisc *slaves;
@@ -65,22 +64,21 @@
unsigned long tx_dropped;
};
-struct teql_sched_data
-{
+struct teql_sched_data {
struct Qdisc *next;
struct teql_master *m;
struct neighbour *ncache;
struct sk_buff_head q;
};
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
/* "teql*" qdisc routines */
static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@
}
static struct sk_buff *
-teql_dequeue(struct Qdisc* sch)
+teql_dequeue(struct Qdisc *sch)
{
struct teql_sched_data *dat = qdisc_priv(sch);
struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@
}
static struct sk_buff *
-teql_peek(struct Qdisc* sch)
+teql_peek(struct Qdisc *sch)
{
/* teql is meant to be used as root qdisc */
return NULL;
}
-static __inline__ void
+static inline void
teql_neigh_release(struct neighbour *n)
{
if (n)
@@ -132,7 +130,7 @@
}
static void
-teql_reset(struct Qdisc* sch)
+teql_reset(struct Qdisc *sch)
{
struct teql_sched_data *dat = qdisc_priv(sch);
@@ -142,13 +140,14 @@
}
static void
-teql_destroy(struct Qdisc* sch)
+teql_destroy(struct Qdisc *sch)
{
struct Qdisc *q, *prev;
struct teql_sched_data *dat = qdisc_priv(sch);
struct teql_master *master = dat->m;
- if ((prev = master->slaves) != NULL) {
+ prev = master->slaves;
+ if (prev) {
do {
q = NEXT_SLAVE(prev);
if (q == sch) {
@@ -180,7 +179,7 @@
static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
- struct teql_master *m = (struct teql_master*)sch->ops;
+ struct teql_master *m = (struct teql_master *)sch->ops;
struct teql_sched_data *q = qdisc_priv(sch);
if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@
nores = 0;
busy = 0;
- if ((q = start) == NULL)
+ q = start;
+ if (!q)
goto drop;
do {
@@ -356,10 +356,10 @@
static int teql_master_open(struct net_device *dev)
{
- struct Qdisc * q;
+ struct Qdisc *q;
struct teql_master *m = netdev_priv(dev);
int mtu = 0xFFFE;
- unsigned flags = IFF_NOARP|IFF_MULTICAST;
+ unsigned int flags = IFF_NOARP | IFF_MULTICAST;
if (m->slaves == NULL)
return -EUNATCH;
@@ -427,7 +427,7 @@
do {
if (new_mtu > qdisc_dev(q)->mtu)
return -EINVAL;
- } while ((q=NEXT_SLAVE(q)) != m->slaves);
+ } while ((q = NEXT_SLAVE(q)) != m->slaves);
}
dev->mtu = new_mtu;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d2..d8d98d5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1475,6 +1475,12 @@
goto out_free;
}
+ if (sk_filter(other, skb) < 0) {
+ /* Toss the packet but do not return any error to the sender */
+ err = len;
+ goto out_free;
+ }
+
unix_state_lock(other);
err = -EPERM;
if (!unix_may_send(sk, other))
@@ -1978,36 +1984,38 @@
mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
- if (mode) {
- unix_state_lock(sk);
- sk->sk_shutdown |= mode;
- other = unix_peer(sk);
- if (other)
- sock_hold(other);
- unix_state_unlock(sk);
- sk->sk_state_change(sk);
+ if (!mode)
+ return 0;
- if (other &&
- (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+ unix_state_lock(sk);
+ sk->sk_shutdown |= mode;
+ other = unix_peer(sk);
+ if (other)
+ sock_hold(other);
+ unix_state_unlock(sk);
+ sk->sk_state_change(sk);
- int peer_mode = 0;
+ if (other &&
+ (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
- if (mode&RCV_SHUTDOWN)
- peer_mode |= SEND_SHUTDOWN;
- if (mode&SEND_SHUTDOWN)
- peer_mode |= RCV_SHUTDOWN;
- unix_state_lock(other);
- other->sk_shutdown |= peer_mode;
- unix_state_unlock(other);
- other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK)
- sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- else if (peer_mode & RCV_SHUTDOWN)
- sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
- }
- if (other)
- sock_put(other);
+ int peer_mode = 0;
+
+ if (mode&RCV_SHUTDOWN)
+ peer_mode |= SEND_SHUTDOWN;
+ if (mode&SEND_SHUTDOWN)
+ peer_mode |= RCV_SHUTDOWN;
+ unix_state_lock(other);
+ other->sk_shutdown |= peer_mode;
+ unix_state_unlock(other);
+ other->sk_state_change(other);
+ if (peer_mode == SHUTDOWN_MASK)
+ sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+ else if (peer_mode & RCV_SHUTDOWN)
+ sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
}
+ if (other)
+ sock_put(other);
+
return 0;
}
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 74944a2..788a12c 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
#include <asm/uaccess.h> /* copy_to/from_user */
#include <linux/init.h> /* __initfunc et al. */
-#define KMEM_SAFETYZONE 8
-
#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev)))
/*