IB/qib: Add congestion control agent implementation
Add a congestion control agent in the driver that handles gets and
sets from the congestion control manager in the fabric for the
Performance Scale Messaging (PSM) library.
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index cbe5771..6e19ec8 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -519,6 +519,7 @@
struct qib_devdata *dd;
struct qib_chippport_specific *cpspec; /* chip-specific per-port */
struct kobject pport_kobj;
+ struct kobject pport_cc_kobj;
struct kobject sl2vl_kobj;
struct kobject diagc_kobj;
@@ -638,6 +639,39 @@
struct timer_list led_override_timer;
struct xmit_wait cong_stats;
struct timer_list symerr_clear_timer;
+
+ /* Synchronize access between driver writes and sysfs reads */
+ spinlock_t cc_shadow_lock
+ ____cacheline_aligned_in_smp;
+
+ /* Shadow copy of the congestion control table */
+ struct cc_table_shadow *ccti_entries_shadow;
+
+ /* Shadow copy of the congestion control entries */
+ struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
+
+ /* List of congestion control table entries */
+ struct ib_cc_table_entry_shadow *ccti_entries;
+
+ /* 16 congestion entries with each entry corresponding to a SL */
+ struct ib_cc_congestion_entry_shadow *congestion_entries;
+
+ /* Total number of congestion control table entries */
+ u16 total_cct_entry;
+
+ /* Bit map identifying service level */
+ u16 cc_sl_control_map;
+
+ /* maximum congestion control table index */
+ u16 ccti_limit;
+
+ /* CA's max number of 64 entry units in the congestion control table */
+ u8 cc_max_table_entries;
+
+ /* Maximum number of congestion control entries that the agent expects
+ * the manager to send.
+ */
+ u8 cc_supported_table_entries;
};
/* Observers. Not to be taken lightly, possibly not to ship. */
@@ -1078,6 +1112,7 @@
extern unsigned long *qib_cpulist;
extern unsigned qib_wc_pat;
+extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
int init_chip_wc_pat(struct qib_devdata *dd, u32);
int qib_enable_wc(struct qib_devdata *dd);
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 306e65e..24ad901 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -41,6 +41,7 @@
#include "qib.h"
#include "qib_common.h"
+#include "qib_mad.h"
/*
* min buffers we want to have per context, after driver
@@ -71,6 +72,9 @@
module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
+unsigned qib_cc_table_size;
+module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
/*
* qib_wc_pat parameter:
* 0 is WC via MTRR
@@ -199,6 +203,7 @@
void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
+ int size;
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
ppd->port = port; /* IB port number, not index */
@@ -212,6 +217,81 @@
ppd->symerr_clear_timer.data = (unsigned long)ppd;
ppd->qib_wq = NULL;
+
+ spin_lock_init(&ppd->cc_shadow_lock);
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
+ goto bail;
+
+ ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
+ IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
+
+ ppd->cc_max_table_entries =
+ ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
+
+ size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
+ * IB_CCT_ENTRIES;
+ ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion control table for port %d!\n",
+ port);
+ goto bail;
+ }
+
+ size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
+ ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion setting list for port %d!\n",
+ port);
+ goto bail_1;
+ }
+
+ size = sizeof(struct cc_table_shadow);
+ ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow ccti list for port %d!\n",
+ port);
+ goto bail_2;
+ }
+
+ size = sizeof(struct ib_cc_congestion_setting_attr);
+ ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow congestion setting list for port %d!\n",
+ port);
+ goto bail_3;
+ }
+
+ return;
+
+bail_3:
+ kfree(ppd->ccti_entries_shadow);
+ ppd->ccti_entries_shadow = NULL;
+bail_2:
+ kfree(ppd->congestion_entries);
+ ppd->congestion_entries = NULL;
+bail_1:
+ kfree(ppd->ccti_entries);
+ ppd->ccti_entries = NULL;
+bail:
+ /* User is intentionally disabling the congestion control agent */
+ if (!qib_cc_table_size)
+ return;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
+ qib_cc_table_size = 0;
+ qib_dev_err(dd,
+ "Congestion Control table size %d less than minimum %d for port %d\n",
+ qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
+ }
+
+ qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
+ port);
+ return;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -1164,10 +1244,24 @@
unsigned long flags;
/* users can't do anything more with chip */
- for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
if (dd->pport[pidx].statusp)
*dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
+ spin_lock(&dd->pport[pidx].cc_shadow_lock);
+
+ kfree(dd->pport[pidx].congestion_entries);
+ dd->pport[pidx].congestion_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries);
+ dd->pport[pidx].ccti_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries_shadow);
+ dd->pport[pidx].ccti_entries_shadow = NULL;
+ kfree(dd->pport[pidx].congestion_entries_shadow);
+ dd->pport[pidx].congestion_entries_shadow = NULL;
+
+ spin_unlock(&dd->pport[pidx].cc_shadow_lock);
+ }
+
if (!qib_wc_pat)
qib_disable_wc(dd);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 6e20b58..19f1e6c 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -49,6 +49,18 @@
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static int reply_failure(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
+}
+
static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
{
struct ib_mad_send_buf *send_buf;
@@ -2047,6 +2059,298 @@
return ret;
}
+static int cc_get_classportinfo(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev)
+{
+ struct ib_cc_classportinfo_attr *p =
+ (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->base_version = 1;
+ p->class_version = 1;
+ p->cap_mask = 0;
+
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_info(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_info_attr *p =
+ (struct ib_cc_info_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->congestion_info = 0;
+ p->control_table_cap = ppd->cc_max_table_entries;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ int i;
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct ib_cc_congestion_entry_shadow *entries;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ entries = ppd->congestion_entries_shadow->entries;
+ p->port_control = cpu_to_be16(
+ ppd->congestion_entries_shadow->port_control);
+ p->control_map = cpu_to_be16(
+ ppd->congestion_entries_shadow->control_map);
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ p->entries[i].ccti_increase = entries[i].ccti_increase;
+ p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
+ p->entries[i].trigger_threshold = entries[i].trigger_threshold;
+ p->entries[i].ccti_min = entries[i].ccti_min;
+ }
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 max_cct_block;
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ max_cct_block =
+ (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
+ max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
+
+ if (cct_block_index > max_cct_block) {
+ spin_unlock(&ppd->cc_shadow_lock);
+ goto bail;
+ }
+
+ ccp->attr_mod = cpu_to_be32(cct_block_index);
+
+ cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
+
+ cct_entry--;
+
+ p->ccti_limit = cpu_to_be16(cct_entry);
+
+ entries = &ppd->ccti_entries_shadow->
+ entries[IB_CCT_ENTRIES * cct_block_index];
+ cct_entry %= IB_CCT_ENTRIES;
+
+ for (i = 0; i <= cct_entry; i++)
+ p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int i;
+
+ ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
+
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ ppd->congestion_entries[i].ccti_increase =
+ p->entries[i].ccti_increase;
+
+ ppd->congestion_entries[i].ccti_timer =
+ be16_to_cpu(p->entries[i].ccti_timer);
+
+ ppd->congestion_entries[i].trigger_threshold =
+ p->entries[i].trigger_threshold;
+
+ ppd->congestion_entries[i].ccti_min =
+ p->entries[i].ccti_min;
+ }
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ /* If this packet is the first in the sequence then
+ * zero the total table entry count.
+ */
+ if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
+ ppd->total_cct_entry = 0;
+
+ cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
+
+ /* ccti_limit is 0 to 63 */
+ ppd->total_cct_entry += (cct_entry + 1);
+
+ if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
+ goto bail;
+
+ ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
+
+ entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
+
+ for (i = 0; i <= cct_entry; i++)
+ entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
+ memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
+ (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
+
+ ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
+ ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
+ memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
+ IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int check_cc_key(struct qib_ibport *ibp,
+ struct ib_cc_mad *ccp, int mad_flags)
+{
+ return 0;
+}
+
+static int process_cc(struct ib_device *ibdev, int mad_flags,
+ u8 port, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ int ret;
+
+ *out_mad = *in_mad;
+
+ if (ccp->class_version != 2) {
+ ccp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *)ccp);
+ goto bail;
+ }
+
+ ret = check_cc_key(ibp, ccp, mad_flags);
+ if (ret)
+ goto bail;
+
+ switch (ccp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CLASSPORTINFO:
+ ret = cc_get_classportinfo(ccp, ibdev);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_INFO:
+ ret = cc_get_congestion_info(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_get_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_get_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_set_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_set_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ case IB_MGMT_METHOD_TRAP:
+ default:
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) ccp);
+ }
+
+bail:
+ return ret;
+}
+
/**
* qib_process_mad - process an incoming MAD packet
* @ibdev: the infiniband device this packet came in on
@@ -2071,6 +2375,8 @@
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
int ret;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2082,6 +2388,15 @@
ret = process_perf(ibdev, port, in_mad, out_mad);
goto bail;
+ case IB_MGMT_CLASS_CONG_MGMT:
+ if (!ppd->congestion_entries_shadow ||
+ !qib_cc_table_size) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+ goto bail;
+
default:
ret = IB_MAD_RESULT_SUCCESS;
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index ecc416c..57bd3fa 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +31,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef _QIB_MAD_H
+#define _QIB_MAD_H
#include <rdma/ib_pma.h>
@@ -223,6 +225,198 @@
#define IB_PMA_SEL_CONG_ROUTING 0x08
/*
+ * Congestion control class attributes
+ */
+#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
+#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
+#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
+#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
+#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
+#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
+#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
+#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
+#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
+#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
+
+/* generalizations for threshold values */
+#define IB_CC_THRESHOLD_NONE 0x0
+#define IB_CC_THRESHOLD_MIN 0x1
+#define IB_CC_THRESHOLD_MAX 0xf
+
+/* CCA MAD header constants */
+#define IB_CC_MAD_LOGDATA_LEN 32
+#define IB_CC_MAD_MGMTDATA_LEN 192
+
+struct ib_cc_mad {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 cckey;
+
+ /* For CongestionLog attribute only */
+ u8 log_data[IB_CC_MAD_LOGDATA_LEN];
+
+ u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
+} __packed;
+
+/*
+ * Congestion Control class portinfo capability mask bits
+ */
+#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
+#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
+#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
+#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
+
+struct ib_cc_classportinfo_attr {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ u8 reserved[3];
+ u8 resp_time_value; /* only lower 5 bits */
+ union ib_gid redirect_gid;
+ __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp; /* only lower 24 bits */
+ __be32 redirect_qkey;
+ union ib_gid trap_gid;
+ __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hl_qp; /* 8, 24 bits respectively */
+ __be32 trap_qkey;
+} __packed;
+
+/* Congestion control traps */
+#define IB_CC_TRAP_KEY_VIOLATION 0x0000
+
+struct ib_cc_trap_key_violation_attr {
+ __be16 source_lid;
+ u8 method;
+ u8 reserved1;
+ __be16 attrib_id;
+ __be32 attrib_mod;
+ __be32 qp;
+ __be64 cckey;
+ u8 sgid[16];
+ u8 padding[24];
+} __packed;
+
+/* Congestion info flags */
+#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
+#define IB_CC_TABLE_CAP_DEFAULT 31
+
+struct ib_cc_info_attr {
+ __be16 congestion_info;
+ u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
+} __packed;
+
+struct ib_cc_key_info_attr {
+ __be64 cckey;
+ u8 protect;
+ __be16 lease_period;
+ __be16 violations;
+} __packed;
+
+#define IB_CC_CL_CA_LOGEVENTS_LEN 208
+
+struct ib_cc_log_attr {
+ u8 log_type;
+ u8 congestion_flags;
+ __be16 threshold_event_counter;
+ __be16 threshold_congestion_event_map;
+ __be16 current_time_stamp;
+ u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
+} __packed;
+
+#define IB_CC_CLEC_SERVICETYPE_RC 0x0
+#define IB_CC_CLEC_SERVICETYPE_UC 0x1
+#define IB_CC_CLEC_SERVICETYPE_RD 0x2
+#define IB_CC_CLEC_SERVICETYPE_UD 0x3
+
+struct ib_cc_log_event {
+ u8 local_qp_cn_entry;
+ u8 remote_qp_number_cn_entry[3];
+ u8 sl_cn_entry:4;
+ u8 service_type_cn_entry:4;
+ __be32 remote_lid_cn_entry;
+ __be32 timestamp_cn_entry;
+} __packed;
+
+/* Sixteen congestion entries */
+#define IB_CC_CCS_ENTRIES 16
+
+/* Port control flags */
+#define IB_CC_CCS_PC_SL_BASED 0x01
+
+struct ib_cc_congestion_entry {
+ u8 ccti_increase;
+ __be16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_entry_shadow {
+ u8 ccti_increase;
+ u16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_setting_attr {
+ __be16 port_control;
+ __be16 control_map;
+ struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+struct ib_cc_congestion_setting_attr_shadow {
+ u16 port_control;
+ u16 control_map;
+ struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
+#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
+
+/* 64 Congestion Control table entries in a single MAD */
+#define IB_CCT_ENTRIES 64
+#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
+
+struct ib_cc_table_entry {
+ __be16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_entry_shadow {
+ u16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_attr {
+ __be16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+struct ib_cc_table_attr_shadow {
+ u16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+#define CC_TABLE_SHADOW_MAX \
+ (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
+
+struct cc_table_shadow {
+ u16 ccti_last_entry;
+ struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
+} __packed;
+
+#endif /* _QIB_MAD_H */
+/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
* We support 5 counters which only count the mandatory quantities.
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index dd9cd49..ae78305 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,6 +34,7 @@
#include <linux/ctype.h>
#include "qib.h"
+#include "qib_mad.h"
/**
* qib_parse_ushort - parse an unsigned short value in an arbitrary base
@@ -231,6 +233,98 @@
NULL
};
+/*
+ * Start of per-port congestion control structures and support code
+ */
+
+/*
+ * Congestion control table size followed by table entries
+ */
+static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
+ return -EINVAL;
+
+ ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ + sizeof(__be16);
+
+ if (pos > ret)
+ return -EINVAL;
+
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->ccti_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+ /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static struct kobj_type qib_port_cc_ktype = {
+ .release = qib_port_release,
+};
+
+static struct bin_attribute cc_table_bin_attr = {
+ .attr = {.name = "cc_table_bin", .mode = 0444},
+ .read = read_cc_table_bin,
+ .size = PAGE_SIZE,
+};
+
+/*
+ * Congestion settings: port control, control map and an array of 16
+ * entries for the congestion entries - increase, timer, event log
+ * trigger threshold and the minimum injection rate delay.
+ */
+static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return -EINVAL;
+
+ ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
+
+ if (pos > ret)
+ return -EINVAL;
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->congestion_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static struct bin_attribute cc_setting_bin_attr = {
+ .attr = {.name = "cc_settings_bin", .mode = 0444},
+ .read = read_cc_setting_bin,
+ .size = PAGE_SIZE,
+};
+
+
static ssize_t qib_portattr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -253,10 +347,6 @@
return pattr->store(ppd, buf, len);
}
-static void qib_port_release(struct kobject *kobj)
-{
- /* nothing to do since memory is freed by qib_free_devdata() */
-}
static const struct sysfs_ops qib_port_ops = {
.show = qib_portattr_show,
@@ -670,7 +760,7 @@
if (ret) {
qib_dev_err(dd, "Skipping sl2vl sysfs info, "
"(err %d) port %u\n", ret, port_num);
- goto bail_sl;
+ goto bail_link;
}
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
@@ -679,15 +769,57 @@
if (ret) {
qib_dev_err(dd, "Skipping diag_counters sysfs info, "
"(err %d) port %u\n", ret, port_num);
- goto bail_diagc;
+ goto bail_sl;
}
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return 0;
+
+ ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
+ kobj, "CCMgtA");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_diagc;
+ }
+
+ kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc;
+ }
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc_entry_bin;
+ }
+
+ qib_devinfo(dd->pcidev,
+ "IB%u: Congestion Control Agent enabled for port %d\n",
+ dd->unit, port_num);
+
return 0;
+bail_cc_entry_bin:
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
+bail_cc:
+ kobject_put(&ppd->pport_cc_kobj);
bail_diagc:
- kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->diagc_kobj);
bail_sl:
+ kobject_put(&ppd->sl2vl_kobj);
+bail_link:
kobject_put(&ppd->pport_kobj);
bail:
return ret;
@@ -720,7 +852,15 @@
for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i];
- kobject_put(&ppd->pport_kobj);
+ if (qib_cc_table_size &&
+ ppd->congestion_entries_shadow) {
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ kobject_put(&ppd->pport_cc_kobj);
+ }
kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->pport_kobj);
}
}