Merge tag 'ccn/fixes-for-4.8-v2' of git://git.linaro.org/people/pawel.moll/linux into fixes

Merge "bus: ARM CCN PMU driver updates" from Paweł Moll:

- Fixes and improvements for XP watchpoint and events handling
- Added missing condition checks for KVM-related exclusions
- Improved interrupt affinity handling
- Fix for hrtimer use in polling mode
- Event grouping implementation improvement

* tag 'ccn/fixes-for-4.8-v2' of git://git.linaro.org/people/pawel.moll/linux:
  bus: arm-ccn: make event groups reliable
  bus: arm-ccn: fix hrtimer registration
  bus: arm-ccn: fix PMU interrupt flags
  bus: arm-ccn: Add missing event attribute exclusions for host/guest
  bus: arm-ccn: Correct required arguments for XP PMU events
  bus: arm-ccn: Fix XP watchpoint settings bitmask
  bus: arm-ccn: Do not attempt to configure XPs for cycle counter
  bus: arm-ccn: Fix PMU handling of MN
diff --git a/Documentation/arm/CCN.txt b/Documentation/arm/CCN.txt
index ffca443..15cdb7b 100644
--- a/Documentation/arm/CCN.txt
+++ b/Documentation/arm/CCN.txt
@@ -18,13 +18,17 @@
 directory provides configuration templates for all documented
 events, that can be used with perf tool. For example "xp_valid_flit"
 is an equivalent of "type=0x8,event=0x4". Other parameters must be
-explicitly specified. For events originating from device, "node"
-defines its index. All crosspoint events require "xp" (index),
-"port" (device port number) and "vc" (virtual channel ID) and
-"dir" (direction). Watchpoints (special "event" value 0xfe) also
-require comparator values ("cmp_l" and "cmp_h") and "mask", being
-index of the comparator mask.
+explicitly specified.
 
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
 Masks are defined separately from the event description
 (due to limited number of the config values) in the "cmp_mask"
 directory, with first 8 configurable by user and additional
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 97a9185..884c030 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -187,6 +187,7 @@
 	struct arm_ccn_component *xp;
 
 	struct arm_ccn_dt dt;
+	int mn_id;
 };
 
 static DEFINE_MUTEX(arm_ccn_mutex);
@@ -212,6 +213,7 @@
 #define CCN_CONFIG_TYPE(_config)	(((_config) >> 8) & 0xff)
 #define CCN_CONFIG_EVENT(_config)	(((_config) >> 16) & 0xff)
 #define CCN_CONFIG_PORT(_config)	(((_config) >> 24) & 0x3)
+#define CCN_CONFIG_BUS(_config)		(((_config) >> 24) & 0x3)
 #define CCN_CONFIG_VC(_config)		(((_config) >> 26) & 0x7)
 #define CCN_CONFIG_DIR(_config)		(((_config) >> 29) & 0x1)
 #define CCN_CONFIG_MASK(_config)	(((_config) >> 30) & 0xf)
@@ -241,6 +243,7 @@
 static CCN_FORMAT_ATTR(type, "config:8-15");
 static CCN_FORMAT_ATTR(event, "config:16-23");
 static CCN_FORMAT_ATTR(port, "config:24-25");
+static CCN_FORMAT_ATTR(bus, "config:24-25");
 static CCN_FORMAT_ATTR(vc, "config:26-28");
 static CCN_FORMAT_ATTR(dir, "config:29-29");
 static CCN_FORMAT_ATTR(mask, "config:30-33");
@@ -253,6 +256,7 @@
 	&arm_ccn_pmu_format_attr_type.attr.attr,
 	&arm_ccn_pmu_format_attr_event.attr.attr,
 	&arm_ccn_pmu_format_attr_port.attr.attr,
+	&arm_ccn_pmu_format_attr_bus.attr.attr,
 	&arm_ccn_pmu_format_attr_vc.attr.attr,
 	&arm_ccn_pmu_format_attr_dir.attr.attr,
 	&arm_ccn_pmu_format_attr_mask.attr.attr,
@@ -328,6 +332,7 @@
 static ssize_t arm_ccn_pmu_event_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
 	struct arm_ccn_pmu_event *event = container_of(attr,
 			struct arm_ccn_pmu_event, attr);
 	ssize_t res;
@@ -349,10 +354,17 @@
 		break;
 	case CCN_TYPE_XP:
 		res += snprintf(buf + res, PAGE_SIZE - res,
-				",xp=?,port=?,vc=?,dir=?");
+				",xp=?,vc=?");
 		if (event->event == CCN_EVENT_WATCHPOINT)
 			res += snprintf(buf + res, PAGE_SIZE - res,
-					",cmp_l=?,cmp_h=?,mask=?");
+					",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
+		else
+			res += snprintf(buf + res, PAGE_SIZE - res,
+					",bus=?");
+
+		break;
+	case CCN_TYPE_MN:
+		res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
 		break;
 	default:
 		res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
@@ -383,9 +395,9 @@
 }
 
 static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
-	CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
-	CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
-	CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
 	CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
 	CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
 	CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
@@ -733,9 +745,10 @@
 
 	if (has_branch_stack(event) || event->attr.exclude_user ||
 			event->attr.exclude_kernel || event->attr.exclude_hv ||
-			event->attr.exclude_idle) {
+			event->attr.exclude_idle || event->attr.exclude_host ||
+			event->attr.exclude_guest) {
 		dev_warn(ccn->dev, "Can't exclude execution levels!\n");
-		return -EOPNOTSUPP;
+		return -EINVAL;
 	}
 
 	if (event->cpu < 0) {
@@ -759,6 +772,12 @@
 
 	/* Validate node/xp vs topology */
 	switch (type) {
+	case CCN_TYPE_MN:
+		if (node_xp != ccn->mn_id) {
+			dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+			return -EINVAL;
+		}
+		break;
 	case CCN_TYPE_XP:
 		if (node_xp >= ccn->num_xps) {
 			dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
@@ -886,6 +905,10 @@
 	struct arm_ccn_component *xp;
 	u32 val, dt_cfg;
 
+	/* Nothing to do for cycle counter */
+	if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
+		return;
+
 	if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
 		xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
 	else
@@ -917,38 +940,17 @@
 			arm_ccn_pmu_read_counter(ccn, hw->idx));
 	hw->state = 0;
 
-	/*
-	 * Pin the timer, so that the overflows are handled by the chosen
-	 * event->cpu (this is the same one as presented in "cpumask"
-	 * attribute).
-	 */
-	if (!ccn->irq)
-		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
-				HRTIMER_MODE_REL_PINNED);
-
 	/* Set the DT bus input, engaging the counter */
 	arm_ccn_pmu_xp_dt_config(event, 1);
 }
 
 static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
 {
-	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 	struct hw_perf_event *hw = &event->hw;
-	u64 timeout;
 
 	/* Disable counting, setting the DT bus to pass-through mode */
 	arm_ccn_pmu_xp_dt_config(event, 0);
 
-	if (!ccn->irq)
-		hrtimer_cancel(&ccn->dt.hrtimer);
-
-	/* Let the DT bus drain */
-	timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) +
-			ccn->num_xps;
-	while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) <
-			timeout)
-		cpu_relax();
-
 	if (flags & PERF_EF_UPDATE)
 		arm_ccn_pmu_event_update(event);
 
@@ -988,7 +990,7 @@
 
 	/* Comparison values */
 	writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
-	writel((cmp_l >> 32) & 0xefffffff,
+	writel((cmp_l >> 32) & 0x7fffffff,
 			source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
 	writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
 	writel((cmp_h >> 32) & 0x0fffffff,
@@ -996,7 +998,7 @@
 
 	/* Mask */
 	writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
-	writel((mask_l >> 32) & 0xefffffff,
+	writel((mask_l >> 32) & 0x7fffffff,
 			source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
 	writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
 	writel((mask_h >> 32) & 0x0fffffff,
@@ -1014,7 +1016,7 @@
 	hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base);
 
 	id = (CCN_CONFIG_VC(event->attr.config) << 4) |
-			(CCN_CONFIG_PORT(event->attr.config) << 3) |
+			(CCN_CONFIG_BUS(event->attr.config) << 3) |
 			(CCN_CONFIG_EVENT(event->attr.config) << 0);
 
 	val = readl(source->base + CCN_XP_PMU_EVENT_SEL);
@@ -1099,15 +1101,31 @@
 	spin_unlock(&ccn->dt.config_lock);
 }
 
+static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn)
+{
+	return bitmap_weight(ccn->dt.pmu_counters_mask,
+			     CCN_NUM_PMU_EVENT_COUNTERS + 1);
+}
+
 static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 {
 	int err;
 	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 
 	err = arm_ccn_pmu_event_alloc(event);
 	if (err)
 		return err;
 
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1)
+		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
 	arm_ccn_pmu_event_config(event);
 
 	hw->state = PERF_HES_STOPPED;
@@ -1120,9 +1138,14 @@
 
 static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
 {
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+
 	arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
 
 	arm_ccn_pmu_event_release(event);
+
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0)
+		hrtimer_cancel(&ccn->dt.hrtimer);
 }
 
 static void arm_ccn_pmu_event_read(struct perf_event *event)
@@ -1130,6 +1153,24 @@
 	arm_ccn_pmu_event_update(event);
 }
 
+static void arm_ccn_pmu_enable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val |= CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
+static void arm_ccn_pmu_disable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val &= ~CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
 static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt)
 {
 	u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR);
@@ -1252,6 +1293,8 @@
 		.start = arm_ccn_pmu_event_start,
 		.stop = arm_ccn_pmu_event_stop,
 		.read = arm_ccn_pmu_event_read,
+		.pmu_enable = arm_ccn_pmu_enable,
+		.pmu_disable = arm_ccn_pmu_disable,
 	};
 
 	/* No overflow interrupt? Have to use a timer instead. */
@@ -1361,6 +1404,8 @@
 
 	switch (type) {
 	case CCN_TYPE_MN:
+		ccn->mn_id = id;
+		return 0;
 	case CCN_TYPE_DT:
 		return 0;
 	case CCN_TYPE_XP:
@@ -1471,8 +1516,9 @@
 		/* Can set 'disable' bits, so can acknowledge interrupts */
 		writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
 				ccn->base + CCN_MN_ERRINT_STATUS);
-		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0,
-				dev_name(ccn->dev), ccn);
+		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler,
+				       IRQF_NOBALANCING | IRQF_NO_THREAD,
+				       dev_name(ccn->dev), ccn);
 		if (err)
 			return err;