Merge branches 'powercap', 'pm-clk', 'pm-config' and 'pm-opp'

* powercap:
  powercap / RAPL: fix build dependency on iosf_mbi
  powercap / RAPL: add new model ids
  powercap / RAPL: handle atom and core differences
  powercap / RAPL: abstract per cpu type functions

* pm-clk:
  PM / clock_ops: make __pm_clk_enable more generic
  PM / clock_ops: Add pm_clk_add_clk()

* pm-config:
  PM: Kconfig: fix unmet dependency for CPU_PM

* pm-opp:
  PM / OPP replace kfree_rcu() with call_srcu() in opp_set_availability()
  PM / OPP Introduce APIs to remove OPPs
  PM / OPP mark OPPs as 'static' or 'dynamic'
  PM / OPP don't match for existing OPPs when list is empty
  PM / OPP rename 'head' as 'rcu_head' or 'srcu_head' based on its type
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 7836930..b32b5d4 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/pm_clock.h>
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 
@@ -34,14 +35,20 @@
 /**
  * pm_clk_enable - Enable a clock, reporting any errors
  * @dev: The device for the given clock
- * @clk: The clock being enabled.
+ * @ce: PM clock entry corresponding to the clock.
  */
-static inline int __pm_clk_enable(struct device *dev, struct clk *clk)
+static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce)
 {
-	int ret = clk_enable(clk);
-	if (ret)
-		dev_err(dev, "%s: failed to enable clk %p, error %d\n",
-			__func__, clk, ret);
+	int ret;
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		ret = clk_enable(ce->clk);
+		if (!ret)
+			ce->status = PCE_STATUS_ENABLED;
+		else
+			dev_err(dev, "%s: failed to enable clk %p, error %d\n",
+				__func__, ce->clk, ret);
+	}
 
 	return ret;
 }
@@ -53,7 +60,8 @@
  */
 static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
 {
-	ce->clk = clk_get(dev, ce->con_id);
+	if (!ce->clk)
+		ce->clk = clk_get(dev, ce->con_id);
 	if (IS_ERR(ce->clk)) {
 		ce->status = PCE_STATUS_ERROR;
 	} else {
@@ -63,15 +71,8 @@
 	}
 }
 
-/**
- * pm_clk_add - Start using a device clock for power management.
- * @dev: Device whose clock is going to be used for power management.
- * @con_id: Connection ID of the clock.
- *
- * Add the clock represented by @con_id to the list of clocks used for
- * the power management of @dev.
- */
-int pm_clk_add(struct device *dev, const char *con_id)
+static int __pm_clk_add(struct device *dev, const char *con_id,
+			struct clk *clk)
 {
 	struct pm_subsys_data *psd = dev_to_psd(dev);
 	struct pm_clock_entry *ce;
@@ -93,6 +94,12 @@
 			kfree(ce);
 			return -ENOMEM;
 		}
+	} else {
+		if (IS_ERR(ce->clk) || !__clk_get(clk)) {
+			kfree(ce);
+			return -ENOENT;
+		}
+		ce->clk = clk;
 	}
 
 	pm_clk_acquire(dev, ce);
@@ -104,6 +111,32 @@
 }
 
 /**
+ * pm_clk_add - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the power management of @dev.
+ */
+int pm_clk_add(struct device *dev, const char *con_id)
+{
+	return __pm_clk_add(dev, con_id, NULL);
+}
+
+/**
+ * pm_clk_add_clk - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @clk: Clock pointer
+ *
+ * Add the clock to the list of clocks used for the power management of @dev.
+ * It will increment refcount on clock pointer, use clk_put() on it when done.
+ */
+int pm_clk_add_clk(struct device *dev, struct clk *clk)
+{
+	return __pm_clk_add(dev, NULL, clk);
+}
+
+/**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
  */
@@ -266,7 +299,6 @@
 	struct pm_subsys_data *psd = dev_to_psd(dev);
 	struct pm_clock_entry *ce;
 	unsigned long flags;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -275,13 +307,8 @@
 
 	spin_lock_irqsave(&psd->lock, flags);
 
-	list_for_each_entry(ce, &psd->clock_list, node) {
-		if (ce->status < PCE_STATUS_ERROR) {
-			ret = __pm_clk_enable(dev, ce->clk);
-			if (!ret)
-				ce->status = PCE_STATUS_ENABLED;
-		}
-	}
+	list_for_each_entry(ce, &psd->clock_list, node)
+		__pm_clk_enable(dev, ce);
 
 	spin_unlock_irqrestore(&psd->lock, flags);
 
@@ -390,7 +417,6 @@
 	struct pm_subsys_data *psd = dev_to_psd(dev);
 	struct pm_clock_entry *ce;
 	unsigned long flags;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -400,13 +426,8 @@
 
 	spin_lock_irqsave(&psd->lock, flags);
 
-	list_for_each_entry(ce, &psd->clock_list, node) {
-		if (ce->status < PCE_STATUS_ERROR) {
-			ret = __pm_clk_enable(dev, ce->clk);
-			if (!ret)
-				ce->status = PCE_STATUS_ENABLED;
-		}
-	}
+	list_for_each_entry(ce, &psd->clock_list, node)
+		__pm_clk_enable(dev, ce);
 
 	spin_unlock_irqrestore(&psd->lock, flags);
 
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 89ced95..2d195f3 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -49,11 +49,12 @@
  *		are protected by the dev_opp_list_lock for integrity.
  *		IMPORTANT: the opp nodes should be maintained in increasing
  *		order.
+ * @dynamic:	not-created from static DT entries.
  * @available:	true/false - marks if this OPP as available or not
  * @rate:	Frequency in hertz
  * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
  * @dev_opp:	points back to the device_opp struct this opp belongs to
- * @head:	RCU callback head used for deferred freeing
+ * @rcu_head:	RCU callback head used for deferred freeing
  *
  * This structure stores the OPP information for a given device.
  */
@@ -61,11 +62,12 @@
 	struct list_head node;
 
 	bool available;
+	bool dynamic;
 	unsigned long rate;
 	unsigned long u_volt;
 
 	struct device_opp *dev_opp;
-	struct rcu_head head;
+	struct rcu_head rcu_head;
 };
 
 /**
@@ -76,7 +78,8 @@
  *		RCU usage: nodes are not modified in the list of device_opp,
  *		however addition is possible and is secured by dev_opp_list_lock
  * @dev:	device pointer
- * @head:	notifier head to notify the OPP availability changes.
+ * @srcu_head:	notifier head to notify the OPP availability changes.
+ * @rcu_head:	RCU callback head used for deferred freeing
  * @opp_list:	list of opps
  *
  * This is an internal data structure maintaining the link to opps attached to
@@ -87,7 +90,8 @@
 	struct list_head node;
 
 	struct device *dev;
-	struct srcu_notifier_head head;
+	struct srcu_notifier_head srcu_head;
+	struct rcu_head rcu_head;
 	struct list_head opp_list;
 };
 
@@ -378,30 +382,8 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
-/**
- * dev_pm_opp_add()  - Add an OPP table from a table definitions
- * @dev:	device for which we do this operation
- * @freq:	Frequency in Hz for this OPP
- * @u_volt:	Voltage in uVolts for this OPP
- *
- * This function adds an opp definition to the opp list and returns status.
- * The opp is made available by default and it can be controlled using
- * dev_pm_opp_enable/disable functions.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0:		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST:	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM:	Memory allocation failure
- */
-int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
+				  unsigned long u_volt, bool dynamic)
 {
 	struct device_opp *dev_opp = NULL;
 	struct dev_pm_opp *opp, *new_opp;
@@ -417,6 +399,13 @@
 	/* Hold our list modification lock here */
 	mutex_lock(&dev_opp_list_lock);
 
+	/* populate the opp table */
+	new_opp->dev_opp = dev_opp;
+	new_opp->rate = freq;
+	new_opp->u_volt = u_volt;
+	new_opp->available = true;
+	new_opp->dynamic = dynamic;
+
 	/* Check for existing list for 'dev' */
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
@@ -436,19 +425,15 @@
 		}
 
 		dev_opp->dev = dev;
-		srcu_init_notifier_head(&dev_opp->head);
+		srcu_init_notifier_head(&dev_opp->srcu_head);
 		INIT_LIST_HEAD(&dev_opp->opp_list);
 
 		/* Secure the device list modification */
 		list_add_rcu(&dev_opp->node, &dev_opp_list);
+		head = &dev_opp->opp_list;
+		goto list_add;
 	}
 
-	/* populate the opp table */
-	new_opp->dev_opp = dev_opp;
-	new_opp->rate = freq;
-	new_opp->u_volt = u_volt;
-	new_opp->available = true;
-
 	/*
 	 * Insert new OPP in order of increasing frequency
 	 * and discard if already present
@@ -474,6 +459,7 @@
 		return ret;
 	}
 
+list_add:
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
@@ -481,11 +467,109 @@
 	 * Notify the changes in the availability of the operable
 	 * frequency/voltage list.
 	 */
-	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
+	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
 	return 0;
 }
+
+/**
+ * dev_pm_opp_add()  - Add an OPP table from a table definitions
+ * @dev:	device for which we do this operation
+ * @freq:	Frequency in Hz for this OPP
+ * @u_volt:	Voltage in uVolts for this OPP
+ *
+ * This function adds an opp definition to the opp list and returns status.
+ * The opp is made available by default and it can be controlled using
+ * dev_pm_opp_enable/disable functions.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0:		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST:	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM:	Memory allocation failure
+ */
+int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+{
+	return dev_pm_opp_add_dynamic(dev, freq, u_volt, true);
+}
 EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 
+static void kfree_opp_rcu(struct rcu_head *head)
+{
+	struct dev_pm_opp *opp = container_of(head, struct dev_pm_opp, rcu_head);
+
+	kfree_rcu(opp, rcu_head);
+}
+
+static void kfree_device_rcu(struct rcu_head *head)
+{
+	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
+
+	kfree(device_opp);
+}
+
+void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
+{
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+	list_del_rcu(&opp->node);
+	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu);
+
+	if (list_empty(&dev_opp->opp_list)) {
+		list_del_rcu(&dev_opp->node);
+		call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
+			  kfree_device_rcu);
+	}
+}
+
+/**
+ * dev_pm_opp_remove()  - Remove an OPP from OPP list
+ * @dev:	device for which we do this operation
+ * @freq:	OPP to remove with matching 'freq'
+ *
+ * This function removes an opp from the opp list.
+ */
+void dev_pm_opp_remove(struct device *dev, unsigned long freq)
+{
+	struct dev_pm_opp *opp;
+	struct device_opp *dev_opp;
+	bool found = false;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		goto unlock;
+
+	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+		if (opp->rate == freq) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
+			 __func__, freq);
+		goto unlock;
+	}
+
+	__dev_pm_opp_remove(dev_opp, opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
+
 /**
  * opp_set_availability() - helper to set the availability of an opp
  * @dev:		device for which we do this operation
@@ -557,14 +641,14 @@
 
 	list_replace_rcu(&opp->node, &new_opp->node);
 	mutex_unlock(&dev_opp_list_lock);
-	kfree_rcu(opp, head);
+	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu);
 
 	/* Notify the change of the OPP availability */
 	if (availability_req)
-		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE,
+		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE,
 					 new_opp);
 	else
-		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE,
+		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE,
 					 new_opp);
 
 	return 0;
@@ -629,7 +713,7 @@
 	if (IS_ERR(dev_opp))
 		return ERR_CAST(dev_opp); /* matching type */
 
-	return &dev_opp->head;
+	return &dev_opp->srcu_head;
 }
 
 #ifdef CONFIG_OF
@@ -666,7 +750,7 @@
 		unsigned long freq = be32_to_cpup(val++) * 1000;
 		unsigned long volt = be32_to_cpup(val++);
 
-		if (dev_pm_opp_add(dev, freq, volt))
+		if (dev_pm_opp_add_dynamic(dev, freq, volt, false))
 			dev_warn(dev, "%s: Failed to add OPP %ld\n",
 				 __func__, freq);
 		nr -= 2;
@@ -675,4 +759,34 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_init_opp_table);
+
+/**
+ * of_free_opp_table() - Free OPP table entries created from static DT entries
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Free OPPs created using static entries present in DT.
+ */
+void of_free_opp_table(struct device *dev)
+{
+	struct device_opp *dev_opp = find_device_opp(dev);
+	struct dev_pm_opp *opp, *tmp;
+
+	/* Check for existing list for 'dev' */
+	dev_opp = find_device_opp(dev);
+	if (WARN(IS_ERR(dev_opp), "%s: dev_opp: %ld\n", dev_name(dev),
+		 PTR_ERR(dev_opp)))
+		return;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Free static OPPs */
+	list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
+		if (!opp->dynamic)
+			__dev_pm_opp_remove(dev_opp, opp);
+	}
+
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(of_free_opp_table);
 #endif
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index a7c81b5..85727ef 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -17,7 +17,7 @@
 # Client driver configurations go here.
 config INTEL_RAPL
 	tristate "Intel RAPL Support"
-	depends on X86
+	depends on X86 && IOSF_MBI
 	default n
 	---help---
 	  This enables support for the Intel Running Average Power Limit (RAPL)
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 45e05b3..c71443c 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -29,6 +29,7 @@
 #include <linux/sysfs.h>
 #include <linux/cpu.h>
 #include <linux/powercap.h>
+#include <asm/iosf_mbi.h>
 
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
@@ -70,11 +71,6 @@
 #define RAPL_PRIMITIVE_DERIVED       BIT(1) /* not from raw data */
 #define RAPL_PRIMITIVE_DUMMY         BIT(2)
 
-/* scale RAPL units to avoid floating point math inside kernel */
-#define POWER_UNIT_SCALE     (1000000)
-#define ENERGY_UNIT_SCALE    (1000000)
-#define TIME_UNIT_SCALE      (1000000)
-
 #define TIME_WINDOW_MAX_MSEC 40000
 #define TIME_WINDOW_MIN_MSEC 250
 
@@ -175,9 +171,9 @@
 	unsigned int id; /* physical package/socket id */
 	unsigned int nr_domains;
 	unsigned long domain_map; /* bit map of active domains */
-	unsigned int power_unit_divisor;
-	unsigned int energy_unit_divisor;
-	unsigned int time_unit_divisor;
+	unsigned int power_unit;
+	unsigned int energy_unit;
+	unsigned int time_unit;
 	struct rapl_domain *domains; /* array of domains, sized at runtime */
 	struct powercap_zone *power_zone; /* keep track of parent zone */
 	int nr_cpus; /* active cpus on the package, topology info is lost during
@@ -188,6 +184,18 @@
 					*/
 	struct list_head plist;
 };
+
+struct rapl_defaults {
+	int (*check_unit)(struct rapl_package *rp, int cpu);
+	void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
+	u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
+				bool to_raw);
+};
+static struct rapl_defaults *rapl_defaults;
+
+/* Sideband MBI registers */
+#define IOSF_CPU_POWER_BUDGET_CTL (0x2)
+
 #define PACKAGE_PLN_INT_SAVED   BIT(0)
 #define MAX_PRIM_NAME (32)
 
@@ -339,23 +347,13 @@
 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
 {
 	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
-	int nr_powerlimit;
 
 	if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
 		return -EACCES;
+
 	get_online_cpus();
-	nr_powerlimit = find_nr_power_limit(rd);
-	/* here we activate/deactivate the hardware for power limiting */
 	rapl_write_data_raw(rd, PL1_ENABLE, mode);
-	/* always enable clamp such that p-state can go below OS requested
-	 * range. power capping priority over guranteed frequency.
-	 */
-	rapl_write_data_raw(rd, PL1_CLAMP, mode);
-	/* some domains have pl2 */
-	if (nr_powerlimit > 1) {
-		rapl_write_data_raw(rd, PL2_ENABLE, mode);
-		rapl_write_data_raw(rd, PL2_CLAMP, mode);
-	}
+	rapl_defaults->set_floor_freq(rd, mode);
 	put_online_cpus();
 
 	return 0;
@@ -653,9 +651,7 @@
 static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
 			int to_raw)
 {
-	u64 divisor = 1;
-	int scale = 1; /* scale to user friendly data without floating point */
-	u64 f, y; /* fraction and exp. used for time unit */
+	u64 units = 1;
 	struct rapl_package *rp;
 
 	rp = find_package_by_id(package);
@@ -664,42 +660,24 @@
 
 	switch (type) {
 	case POWER_UNIT:
-		divisor = rp->power_unit_divisor;
-		scale = POWER_UNIT_SCALE;
+		units = rp->power_unit;
 		break;
 	case ENERGY_UNIT:
-		scale = ENERGY_UNIT_SCALE;
-		divisor = rp->energy_unit_divisor;
+		units = rp->energy_unit;
 		break;
 	case TIME_UNIT:
-		divisor = rp->time_unit_divisor;
-		scale = TIME_UNIT_SCALE;
-		/* special processing based on 2^Y*(1+F)/4 = val/divisor, refer
-		 * to Intel Software Developer's manual Vol. 3a, CH 14.7.4.
-		 */
-		if (!to_raw) {
-			f = (value & 0x60) >> 5;
-			y = value & 0x1f;
-			value = (1 << y) * (4 + f) * scale / 4;
-			return div64_u64(value, divisor);
-		} else {
-			do_div(value, scale);
-			value *= divisor;
-			y = ilog2(value);
-			f = div64_u64(4 * (value - (1 << y)), 1 << y);
-			value = (y & 0x1f) | ((f & 0x3) << 5);
-			return value;
-		}
-		break;
+		return rapl_defaults->compute_time_window(rp, value, to_raw);
 	case ARBITRARY_UNIT:
 	default:
 		return value;
 	};
 
 	if (to_raw)
-		return div64_u64(value * divisor, scale);
-	else
-		return div64_u64(value * scale, divisor);
+		return div64_u64(value, units);
+
+	value *= units;
+
+	return value;
 }
 
 /* in the order of enum rapl_primitives */
@@ -833,12 +811,18 @@
 	return 0;
 }
 
-static const struct x86_cpu_id energy_unit_quirk_ids[] = {
-	{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
-	{}
-};
-
-static int rapl_check_unit(struct rapl_package *rp, int cpu)
+/*
+ * Raw RAPL data stored in MSRs are in certain scales. We need to
+ * convert them into standard units based on the units reported in
+ * the RAPL unit MSRs. This is specific to CPUs as the method to
+ * calculate units differ on different CPUs.
+ * We convert the units to below format based on CPUs.
+ * i.e.
+ * energy unit: microJoules : Represented in microJoules by default
+ * power unit : microWatts  : Represented in milliWatts by default
+ * time unit  : microseconds: Represented in seconds by default
+ */
+static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
 {
 	u64 msr_val;
 	u32 value;
@@ -849,36 +833,47 @@
 		return -ENODEV;
 	}
 
-	/* Raw RAPL data stored in MSRs are in certain scales. We need to
-	 * convert them into standard units based on the divisors reported in
-	 * the RAPL unit MSRs.
-	 * i.e.
-	 * energy unit: 1/enery_unit_divisor Joules
-	 * power unit: 1/power_unit_divisor Watts
-	 * time unit: 1/time_unit_divisor Seconds
-	 */
 	value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-	/* some CPUs have different way to calculate energy unit */
-	if (x86_match_cpu(energy_unit_quirk_ids))
-		rp->energy_unit_divisor = 1000000 / (1 << value);
-	else
-		rp->energy_unit_divisor = 1 << value;
+	rp->energy_unit = 1000000 / (1 << value);
 
 	value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
-	rp->power_unit_divisor = 1 << value;
+	rp->power_unit = 1000000 / (1 << value);
 
 	value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
-	rp->time_unit_divisor = 1 << value;
+	rp->time_unit = 1000000 / (1 << value);
 
-	pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n",
-		rp->id,
-		rp->energy_unit_divisor,
-		rp->time_unit_divisor,
-		rp->power_unit_divisor);
+	pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n",
+		rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
 
 	return 0;
 }
 
+static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
+{
+	u64 msr_val;
+	u32 value;
+
+	if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) {
+		pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n",
+			MSR_RAPL_POWER_UNIT, cpu);
+		return -ENODEV;
+	}
+	value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
+	rp->energy_unit = 1 << value;
+
+	value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
+	rp->power_unit = (1 << value) * 1000;
+
+	value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
+	rp->time_unit = 1000000 / (1 << value);
+
+	pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n",
+		rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
+
+	return 0;
+}
+
+
 /* REVISIT:
  * When package power limit is set artificially low by RAPL, LVT
  * thermal interrupt for package power limit should be ignored
@@ -946,16 +941,107 @@
 	wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 }
 
+static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
+{
+	int nr_powerlimit = find_nr_power_limit(rd);
+
+	/* always enable clamp such that p-state can go below OS requested
+	 * range. power capping priority over guranteed frequency.
+	 */
+	rapl_write_data_raw(rd, PL1_CLAMP, mode);
+
+	/* some domains have pl2 */
+	if (nr_powerlimit > 1) {
+		rapl_write_data_raw(rd, PL2_ENABLE, mode);
+		rapl_write_data_raw(rd, PL2_CLAMP, mode);
+	}
+}
+
+static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
+{
+	static u32 power_ctrl_orig_val;
+	u32 mdata;
+
+	if (!power_ctrl_orig_val)
+		iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
+			IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val);
+	mdata = power_ctrl_orig_val;
+	if (enable) {
+		mdata &= ~(0x7f << 8);
+		mdata |= 1 << 8;
+	}
+	iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
+		IOSF_CPU_POWER_BUDGET_CTL, mdata);
+}
+
+static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
+					bool to_raw)
+{
+	u64 f, y; /* fraction and exp. used for time unit */
+
+	/*
+	 * Special processing based on 2^Y*(1+F/4), refer
+	 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
+	 */
+	if (!to_raw) {
+		f = (value & 0x60) >> 5;
+		y = value & 0x1f;
+		value = (1 << y) * (4 + f) * rp->time_unit / 4;
+	} else {
+		do_div(value, rp->time_unit);
+		y = ilog2(value);
+		f = div64_u64(4 * (value - (1 << y)), 1 << y);
+		value = (y & 0x1f) | ((f & 0x3) << 5);
+	}
+	return value;
+}
+
+static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
+					bool to_raw)
+{
+	/*
+	 * Atom time unit encoding is straight forward val * time_unit,
+	 * where time_unit is default to 1 sec. Never 0.
+	 */
+	if (!to_raw)
+		return (value) ? value *= rp->time_unit : rp->time_unit;
+	else
+		value = div64_u64(value, rp->time_unit);
+
+	return value;
+}
+
+static const struct rapl_defaults rapl_defaults_core = {
+	.check_unit = rapl_check_unit_core,
+	.set_floor_freq = set_floor_freq_default,
+	.compute_time_window = rapl_compute_time_window_core,
+};
+
+static const struct rapl_defaults rapl_defaults_atom = {
+	.check_unit = rapl_check_unit_atom,
+	.set_floor_freq = set_floor_freq_atom,
+	.compute_time_window = rapl_compute_time_window_atom,
+};
+
+#define RAPL_CPU(_model, _ops) {			\
+		.vendor = X86_VENDOR_INTEL,		\
+		.family = 6,				\
+		.model = _model,			\
+		.driver_data = (kernel_ulong_t)&_ops,	\
+		}
+
 static const struct x86_cpu_id rapl_ids[] = {
-	{ X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */
-	{ X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */
-	{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
-	{ X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */
-	{ X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */
-	{ X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */
-	{ X86_VENDOR_INTEL, 6, 0x3f},/* Haswell */
-	{ X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */
-	/* TODO: Add more CPU IDs after testing */
+	RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */
+	RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */
+	RAPL_CPU(0x37, rapl_defaults_atom),/* Valleyview */
+	RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */
+	RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */
+	RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */
+	RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */
+	RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
+	RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
+	RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
+	RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@@ -1241,7 +1327,7 @@
 
 			/* check if the package contains valid domains */
 			if (rapl_detect_domains(new_package, i) ||
-				rapl_check_unit(new_package, i)) {
+				rapl_defaults->check_unit(new_package, i)) {
 				kfree(new_package->domains);
 				kfree(new_package);
 				/* free up the packages already initialized */
@@ -1296,7 +1382,7 @@
 	rp->nr_cpus = 1;
 	/* check if the package contains valid domains */
 	if (rapl_detect_domains(rp, cpu) ||
-		rapl_check_unit(rp, cpu)) {
+		rapl_defaults->check_unit(rp, cpu)) {
 		ret = -ENODEV;
 		goto err_free_package;
 	}
@@ -1358,14 +1444,18 @@
 static int __init rapl_init(void)
 {
 	int ret = 0;
+	const struct x86_cpu_id *id;
 
-	if (!x86_match_cpu(rapl_ids)) {
+	id = x86_match_cpu(rapl_ids);
+	if (!id) {
 		pr_err("driver does not support CPU family %d model %d\n",
 			boot_cpu_data.x86, boot_cpu_data.x86_model);
 
 		return -ENODEV;
 	}
 
+	rapl_defaults = (struct rapl_defaults *)id->driver_data;
+
 	cpu_notifier_register_begin();
 
 	/* prevent CPU hotplug during detection */
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 8348866..0b00396 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -18,6 +18,8 @@
 	char *con_ids[];
 };
 
+struct clk;
+
 #ifdef CONFIG_PM_CLK
 static inline bool pm_clk_no_clocks(struct device *dev)
 {
@@ -29,6 +31,7 @@
 extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
+extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
@@ -51,6 +54,11 @@
 {
 	return -EINVAL;
 }
+
+static inline int pm_clk_add_clk(struct device *dev, struct clk *clk)
+{
+	return -EINVAL;
+}
 static inline void pm_clk_remove(struct device *dev, const char *con_id)
 {
 }
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0330217..cec2d45 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -21,7 +21,7 @@
 struct device;
 
 enum dev_pm_opp_event {
-	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
+	OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
 };
 
 #if defined(CONFIG_PM_OPP)
@@ -44,6 +44,7 @@
 
 int dev_pm_opp_add(struct device *dev, unsigned long freq,
 		   unsigned long u_volt);
+void dev_pm_opp_remove(struct device *dev, unsigned long freq);
 
 int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 
@@ -90,6 +91,10 @@
 	return -EINVAL;
 }
 
+static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
+{
+}
+
 static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return 0;
@@ -109,11 +114,16 @@
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int of_init_opp_table(struct device *dev);
+void of_free_opp_table(struct device *dev);
 #else
 static inline int of_init_opp_table(struct device *dev)
 {
 	return -EINVAL;
 }
+
+static inline void of_free_opp_table(struct device *dev)
+{
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index bbef57f..1eb7da7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -308,4 +308,3 @@
 
 config CPU_PM
 	bool
-	depends on SUSPEND || CPU_IDLE