blob: d833c8f5b465d4a96e5ee0d34de8758e1035169e [file] [log] [blame]
Jacob Pand6d71ee2013-01-21 04:37:57 -08001/*
2 * intel_powerclamp.c - package c-state idle injection
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 *
6 * Authors:
7 * Arjan van de Ven <arjan@linux.intel.com>
8 * Jacob Pan <jacob.jun.pan@linux.intel.com>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 *
23 *
24 * TODO:
25 * 1. better handle wakeup from external interrupts, currently a fixed
26 * compensation is added to clamping duration when excessive amount
27 * of wakeups are observed during idle time. the reason is that in
28 * case of external interrupts without need for ack, clamping down
29 * cpu in non-irq context does not reduce irq. for majority of the
30 * cases, clamping down cpu does help reduce irq as well, we should
31 * be able to differenciate the two cases and give a quantitative
32 * solution for the irqs that we can control. perhaps based on
33 * get_cpu_iowait_time_us()
34 *
35 * 2. synchronization with other hw blocks
36 *
37 *
38 */
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/module.h>
43#include <linux/kernel.h>
44#include <linux/delay.h>
45#include <linux/kthread.h>
46#include <linux/freezer.h>
47#include <linux/cpu.h>
48#include <linux/thermal.h>
49#include <linux/slab.h>
50#include <linux/tick.h>
51#include <linux/debugfs.h>
52#include <linux/seq_file.h>
Linus Torvalds19cc90f2013-02-28 20:23:09 -080053#include <linux/sched/rt.h>
Jacob Pand6d71ee2013-01-21 04:37:57 -080054
55#include <asm/nmi.h>
56#include <asm/msr.h>
57#include <asm/mwait.h>
58#include <asm/cpu_device_id.h>
59#include <asm/idle.h>
60#include <asm/hardirq.h>
61
62#define MAX_TARGET_RATIO (50U)
63/* For each undisturbed clamping period (no extra wake ups during idle time),
64 * we increment the confidence counter for the given target ratio.
65 * CONFIDENCE_OK defines the level where runtime calibration results are
66 * valid.
67 */
68#define CONFIDENCE_OK (3)
69/* Default idle injection duration, driver adjust sleep time to meet target
70 * idle ratio. Similar to frequency modulation.
71 */
72#define DEFAULT_DURATION_JIFFIES (6)
73
74static unsigned int target_mwait;
75static struct dentry *debug_dir;
76
77/* user selected target */
78static unsigned int set_target_ratio;
79static unsigned int current_ratio;
80static bool should_skip;
81static bool reduce_irq;
82static atomic_t idle_wakeup_counter;
83static unsigned int control_cpu; /* The cpu assigned to collect stat and update
84 * control parameters. default to BSP but BSP
85 * can be offlined.
86 */
87static bool clamping;
88
89
90static struct task_struct * __percpu *powerclamp_thread;
91static struct thermal_cooling_device *cooling_dev;
92static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
93 * clamping thread
94 */
95
96static unsigned int duration;
97static unsigned int pkg_cstate_ratio_cur;
98static unsigned int window_size;
99
100static int duration_set(const char *arg, const struct kernel_param *kp)
101{
102 int ret = 0;
103 unsigned long new_duration;
104
105 ret = kstrtoul(arg, 10, &new_duration);
106 if (ret)
107 goto exit;
108 if (new_duration > 25 || new_duration < 6) {
109 pr_err("Out of recommended range %lu, between 6-25ms\n",
110 new_duration);
111 ret = -EINVAL;
112 }
113
114 duration = clamp(new_duration, 6ul, 25ul);
115 smp_mb();
116
117exit:
118
119 return ret;
120}
121
122static struct kernel_param_ops duration_ops = {
123 .set = duration_set,
124 .get = param_get_int,
125};
126
127
128module_param_cb(duration, &duration_ops, &duration, 0644);
129MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
130
131struct powerclamp_calibration_data {
132 unsigned long confidence; /* used for calibration, basically a counter
133 * gets incremented each time a clamping
134 * period is completed without extra wakeups
135 * once that counter is reached given level,
136 * compensation is deemed usable.
137 */
138 unsigned long steady_comp; /* steady state compensation used when
139 * no extra wakeups occurred.
140 */
141 unsigned long dynamic_comp; /* compensate excessive wakeup from idle
142 * mostly from external interrupts.
143 */
144};
145
146static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
147
148static int window_size_set(const char *arg, const struct kernel_param *kp)
149{
150 int ret = 0;
151 unsigned long new_window_size;
152
153 ret = kstrtoul(arg, 10, &new_window_size);
154 if (ret)
155 goto exit_win;
156 if (new_window_size > 10 || new_window_size < 2) {
157 pr_err("Out of recommended window size %lu, between 2-10\n",
158 new_window_size);
159 ret = -EINVAL;
160 }
161
162 window_size = clamp(new_window_size, 2ul, 10ul);
163 smp_mb();
164
165exit_win:
166
167 return ret;
168}
169
170static struct kernel_param_ops window_size_ops = {
171 .set = window_size_set,
172 .get = param_get_int,
173};
174
175module_param_cb(window_size, &window_size_ops, &window_size, 0644);
176MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
177 "\tpowerclamp controls idle ratio within this window. larger\n"
178 "\twindow size results in slower response time but more smooth\n"
179 "\tclamping results. default to 2.");
180
181static void find_target_mwait(void)
182{
183 unsigned int eax, ebx, ecx, edx;
184 unsigned int highest_cstate = 0;
185 unsigned int highest_subcstate = 0;
186 int i;
187
188 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
189 return;
190
191 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
192
193 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
194 !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
195 return;
196
197 edx >>= MWAIT_SUBSTATE_SIZE;
198 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
199 if (edx & MWAIT_SUBSTATE_MASK) {
200 highest_cstate = i;
201 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
202 }
203 }
204 target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
205 (highest_subcstate - 1);
206
207}
208
209static u64 pkg_state_counter(void)
210{
211 u64 val;
212 u64 count = 0;
213
214 static bool skip_c2;
215 static bool skip_c3;
216 static bool skip_c6;
217 static bool skip_c7;
218
219 if (!skip_c2) {
220 if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
221 count += val;
222 else
223 skip_c2 = true;
224 }
225
226 if (!skip_c3) {
227 if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
228 count += val;
229 else
230 skip_c3 = true;
231 }
232
233 if (!skip_c6) {
234 if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
235 count += val;
236 else
237 skip_c6 = true;
238 }
239
240 if (!skip_c7) {
241 if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
242 count += val;
243 else
244 skip_c7 = true;
245 }
246
247 return count;
248}
249
250static void noop_timer(unsigned long foo)
251{
252 /* empty... just the fact that we get the interrupt wakes us up */
253}
254
255static unsigned int get_compensation(int ratio)
256{
257 unsigned int comp = 0;
258
259 /* we only use compensation if all adjacent ones are good */
260 if (ratio == 1 &&
261 cal_data[ratio].confidence >= CONFIDENCE_OK &&
262 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
263 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
264 comp = (cal_data[ratio].steady_comp +
265 cal_data[ratio + 1].steady_comp +
266 cal_data[ratio + 2].steady_comp) / 3;
267 } else if (ratio == MAX_TARGET_RATIO - 1 &&
268 cal_data[ratio].confidence >= CONFIDENCE_OK &&
269 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
270 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
271 comp = (cal_data[ratio].steady_comp +
272 cal_data[ratio - 1].steady_comp +
273 cal_data[ratio - 2].steady_comp) / 3;
274 } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
275 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
276 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
277 comp = (cal_data[ratio].steady_comp +
278 cal_data[ratio - 1].steady_comp +
279 cal_data[ratio + 1].steady_comp) / 3;
280 }
281
282 /* REVISIT: simple penalty of double idle injection */
283 if (reduce_irq)
284 comp = ratio;
285 /* do not exceed limit */
286 if (comp + ratio >= MAX_TARGET_RATIO)
287 comp = MAX_TARGET_RATIO - ratio - 1;
288
289 return comp;
290}
291
292static void adjust_compensation(int target_ratio, unsigned int win)
293{
294 int delta;
295 struct powerclamp_calibration_data *d = &cal_data[target_ratio];
296
297 /*
298 * adjust compensations if confidence level has not been reached or
299 * there are too many wakeups during the last idle injection period, we
300 * cannot trust the data for compensation.
301 */
302 if (d->confidence >= CONFIDENCE_OK ||
303 atomic_read(&idle_wakeup_counter) >
304 win * num_online_cpus())
305 return;
306
307 delta = set_target_ratio - current_ratio;
308 /* filter out bad data */
309 if (delta >= 0 && delta <= (1+target_ratio/10)) {
310 if (d->steady_comp)
311 d->steady_comp =
312 roundup(delta+d->steady_comp, 2)/2;
313 else
314 d->steady_comp = delta;
315 d->confidence++;
316 }
317}
318
319static bool powerclamp_adjust_controls(unsigned int target_ratio,
320 unsigned int guard, unsigned int win)
321{
322 static u64 msr_last, tsc_last;
323 u64 msr_now, tsc_now;
324 u64 val64;
325
326 /* check result for the last window */
327 msr_now = pkg_state_counter();
328 rdtscll(tsc_now);
329
330 /* calculate pkg cstate vs tsc ratio */
331 if (!msr_last || !tsc_last)
332 current_ratio = 1;
333 else if (tsc_now-tsc_last) {
334 val64 = 100*(msr_now-msr_last);
335 do_div(val64, (tsc_now-tsc_last));
336 current_ratio = val64;
337 }
338
339 /* update record */
340 msr_last = msr_now;
341 tsc_last = tsc_now;
342
343 adjust_compensation(target_ratio, win);
344 /*
345 * too many external interrupts, set flag such
346 * that we can take measure later.
347 */
348 reduce_irq = atomic_read(&idle_wakeup_counter) >=
349 2 * win * num_online_cpus();
350
351 atomic_set(&idle_wakeup_counter, 0);
352 /* if we are above target+guard, skip */
353 return set_target_ratio + guard <= current_ratio;
354}
355
356static int clamp_thread(void *arg)
357{
358 int cpunr = (unsigned long)arg;
359 DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
360 static const struct sched_param param = {
361 .sched_priority = MAX_USER_RT_PRIO/2,
362 };
363 unsigned int count = 0;
364 unsigned int target_ratio;
365
366 set_bit(cpunr, cpu_clamping_mask);
367 set_freezable();
368 init_timer_on_stack(&wakeup_timer);
369 sched_setscheduler(current, SCHED_FIFO, &param);
370
371 while (true == clamping && !kthread_should_stop() &&
372 cpu_online(cpunr)) {
373 int sleeptime;
374 unsigned long target_jiffies;
375 unsigned int guard;
376 unsigned int compensation = 0;
377 int interval; /* jiffies to sleep for each attempt */
378 unsigned int duration_jiffies = msecs_to_jiffies(duration);
379 unsigned int window_size_now;
380
381 try_to_freeze();
382 /*
383 * make sure user selected ratio does not take effect until
384 * the next round. adjust target_ratio if user has changed
385 * target such that we can converge quickly.
386 */
387 target_ratio = set_target_ratio;
388 guard = 1 + target_ratio/20;
389 window_size_now = window_size;
390 count++;
391
392 /*
393 * systems may have different ability to enter package level
394 * c-states, thus we need to compensate the injected idle ratio
395 * to achieve the actual target reported by the HW.
396 */
397 compensation = get_compensation(target_ratio);
398 interval = duration_jiffies*100/(target_ratio+compensation);
399
400 /* align idle time */
401 target_jiffies = roundup(jiffies, interval);
402 sleeptime = target_jiffies - jiffies;
403 if (sleeptime <= 0)
404 sleeptime = 1;
405 schedule_timeout_interruptible(sleeptime);
406 /*
407 * only elected controlling cpu can collect stats and update
408 * control parameters.
409 */
410 if (cpunr == control_cpu && !(count%window_size_now)) {
411 should_skip =
412 powerclamp_adjust_controls(target_ratio,
413 guard, window_size_now);
414 smp_mb();
415 }
416
417 if (should_skip)
418 continue;
419
420 target_jiffies = jiffies + duration_jiffies;
421 mod_timer(&wakeup_timer, target_jiffies);
422 if (unlikely(local_softirq_pending()))
423 continue;
424 /*
425 * stop tick sched during idle time, interrupts are still
426 * allowed. thus jiffies are updated properly.
427 */
428 preempt_disable();
429 tick_nohz_idle_enter();
430 /* mwait until target jiffies is reached */
431 while (time_before(jiffies, target_jiffies)) {
432 unsigned long ecx = 1;
433 unsigned long eax = target_mwait;
434
435 /*
436 * REVISIT: may call enter_idle() to notify drivers who
437 * can save power during cpu idle. same for exit_idle()
438 */
439 local_touch_nmi();
440 stop_critical_timings();
Peter Zijlstra16824252013-12-12 15:08:36 +0100441 mwait_idle_with_hints(eax, ecx);
Jacob Pand6d71ee2013-01-21 04:37:57 -0800442 start_critical_timings();
443 atomic_inc(&idle_wakeup_counter);
444 }
445 tick_nohz_idle_exit();
Peter Zijlstra130816c2013-12-11 12:21:17 +0100446 preempt_enable();
Jacob Pand6d71ee2013-01-21 04:37:57 -0800447 }
448 del_timer_sync(&wakeup_timer);
449 clear_bit(cpunr, cpu_clamping_mask);
450
451 return 0;
452}
453
454/*
455 * 1 HZ polling while clamping is active, useful for userspace
456 * to monitor actual idle ratio.
457 */
458static void poll_pkg_cstate(struct work_struct *dummy);
459static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
460static void poll_pkg_cstate(struct work_struct *dummy)
461{
462 static u64 msr_last;
463 static u64 tsc_last;
464 static unsigned long jiffies_last;
465
466 u64 msr_now;
467 unsigned long jiffies_now;
468 u64 tsc_now;
469 u64 val64;
470
471 msr_now = pkg_state_counter();
472 rdtscll(tsc_now);
473 jiffies_now = jiffies;
474
475 /* calculate pkg cstate vs tsc ratio */
476 if (!msr_last || !tsc_last)
477 pkg_cstate_ratio_cur = 1;
478 else {
479 if (tsc_now - tsc_last) {
480 val64 = 100 * (msr_now - msr_last);
481 do_div(val64, (tsc_now - tsc_last));
482 pkg_cstate_ratio_cur = val64;
483 }
484 }
485
486 /* update record */
487 msr_last = msr_now;
488 jiffies_last = jiffies_now;
489 tsc_last = tsc_now;
490
491 if (true == clamping)
492 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
493}
494
495static int start_power_clamp(void)
496{
497 unsigned long cpu;
498 struct task_struct *thread;
499
500 /* check if pkg cstate counter is completely 0, abort in this case */
501 if (!pkg_state_counter()) {
502 pr_err("pkg cstate counter not functional, abort\n");
503 return -EINVAL;
504 }
505
Dan Carpenterc8165dc2013-01-24 08:51:22 +0000506 set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
Jacob Pand6d71ee2013-01-21 04:37:57 -0800507 /* prevent cpu hotplug */
508 get_online_cpus();
509
510 /* prefer BSP */
511 control_cpu = 0;
512 if (!cpu_online(control_cpu))
513 control_cpu = smp_processor_id();
514
515 clamping = true;
516 schedule_delayed_work(&poll_pkg_cstate_work, 0);
517
518 /* start one thread per online cpu */
519 for_each_online_cpu(cpu) {
520 struct task_struct **p =
521 per_cpu_ptr(powerclamp_thread, cpu);
522
523 thread = kthread_create_on_node(clamp_thread,
524 (void *) cpu,
525 cpu_to_node(cpu),
526 "kidle_inject/%ld", cpu);
527 /* bind to cpu here */
528 if (likely(!IS_ERR(thread))) {
529 kthread_bind(thread, cpu);
530 wake_up_process(thread);
531 *p = thread;
532 }
533
534 }
535 put_online_cpus();
536
537 return 0;
538}
539
540static void end_power_clamp(void)
541{
542 int i;
543 struct task_struct *thread;
544
545 clamping = false;
546 /*
547 * make clamping visible to other cpus and give per cpu clamping threads
548 * sometime to exit, or gets killed later.
549 */
550 smp_mb();
551 msleep(20);
552 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
553 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
554 pr_debug("clamping thread for cpu %d alive, kill\n", i);
555 thread = *per_cpu_ptr(powerclamp_thread, i);
556 kthread_stop(thread);
557 }
558 }
559}
560
561static int powerclamp_cpu_callback(struct notifier_block *nfb,
562 unsigned long action, void *hcpu)
563{
564 unsigned long cpu = (unsigned long)hcpu;
565 struct task_struct *thread;
566 struct task_struct **percpu_thread =
567 per_cpu_ptr(powerclamp_thread, cpu);
568
569 if (false == clamping)
570 goto exit_ok;
571
572 switch (action) {
573 case CPU_ONLINE:
574 thread = kthread_create_on_node(clamp_thread,
575 (void *) cpu,
576 cpu_to_node(cpu),
577 "kidle_inject/%lu", cpu);
578 if (likely(!IS_ERR(thread))) {
579 kthread_bind(thread, cpu);
580 wake_up_process(thread);
581 *percpu_thread = thread;
582 }
583 /* prefer BSP as controlling CPU */
584 if (cpu == 0) {
585 control_cpu = 0;
586 smp_mb();
587 }
588 break;
589 case CPU_DEAD:
590 if (test_bit(cpu, cpu_clamping_mask)) {
591 pr_err("cpu %lu dead but powerclamping thread is not\n",
592 cpu);
593 kthread_stop(*percpu_thread);
594 }
595 if (cpu == control_cpu) {
596 control_cpu = smp_processor_id();
597 smp_mb();
598 }
599 }
600
601exit_ok:
602 return NOTIFY_OK;
603}
604
605static struct notifier_block powerclamp_cpu_notifier = {
606 .notifier_call = powerclamp_cpu_callback,
607};
608
609static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
610 unsigned long *state)
611{
612 *state = MAX_TARGET_RATIO;
613
614 return 0;
615}
616
617static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
618 unsigned long *state)
619{
620 if (true == clamping)
621 *state = pkg_cstate_ratio_cur;
622 else
623 /* to save power, do not poll idle ratio while not clamping */
624 *state = -1; /* indicates invalid state */
625
626 return 0;
627}
628
629static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
630 unsigned long new_target_ratio)
631{
632 int ret = 0;
633
634 new_target_ratio = clamp(new_target_ratio, 0UL,
635 (unsigned long) (MAX_TARGET_RATIO-1));
636 if (set_target_ratio == 0 && new_target_ratio > 0) {
637 pr_info("Start idle injection to reduce power\n");
638 set_target_ratio = new_target_ratio;
639 ret = start_power_clamp();
640 goto exit_set;
641 } else if (set_target_ratio > 0 && new_target_ratio == 0) {
642 pr_info("Stop forced idle injection\n");
643 set_target_ratio = 0;
644 end_power_clamp();
645 } else /* adjust currently running */ {
646 set_target_ratio = new_target_ratio;
647 /* make new set_target_ratio visible to other cpus */
648 smp_mb();
649 }
650
651exit_set:
652 return ret;
653}
654
655/* bind to generic thermal layer as cooling device*/
656static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
657 .get_max_state = powerclamp_get_max_state,
658 .get_cur_state = powerclamp_get_cur_state,
659 .set_cur_state = powerclamp_set_cur_state,
660};
661
662/* runs on Nehalem and later */
663static const struct x86_cpu_id intel_powerclamp_ids[] = {
664 { X86_VENDOR_INTEL, 6, 0x1a},
665 { X86_VENDOR_INTEL, 6, 0x1c},
666 { X86_VENDOR_INTEL, 6, 0x1e},
667 { X86_VENDOR_INTEL, 6, 0x1f},
668 { X86_VENDOR_INTEL, 6, 0x25},
669 { X86_VENDOR_INTEL, 6, 0x26},
670 { X86_VENDOR_INTEL, 6, 0x2a},
671 { X86_VENDOR_INTEL, 6, 0x2c},
672 { X86_VENDOR_INTEL, 6, 0x2d},
673 { X86_VENDOR_INTEL, 6, 0x2e},
674 { X86_VENDOR_INTEL, 6, 0x2f},
675 { X86_VENDOR_INTEL, 6, 0x3a},
Jacob Pan90fc9cd2013-09-26 04:33:25 -0700676 { X86_VENDOR_INTEL, 6, 0x3c},
677 { X86_VENDOR_INTEL, 6, 0x3e},
678 { X86_VENDOR_INTEL, 6, 0x3f},
679 { X86_VENDOR_INTEL, 6, 0x45},
680 { X86_VENDOR_INTEL, 6, 0x46},
Jacob Pand6d71ee2013-01-21 04:37:57 -0800681 {}
682};
683MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
684
685static int powerclamp_probe(void)
686{
687 if (!x86_match_cpu(intel_powerclamp_ids)) {
688 pr_err("Intel powerclamp does not run on family %d model %d\n",
689 boot_cpu_data.x86, boot_cpu_data.x86_model);
690 return -ENODEV;
691 }
692 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
693 !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
694 !boot_cpu_has(X86_FEATURE_MWAIT) ||
695 !boot_cpu_has(X86_FEATURE_ARAT))
696 return -ENODEV;
697
698 /* find the deepest mwait value */
699 find_target_mwait();
700
701 return 0;
702}
703
704static int powerclamp_debug_show(struct seq_file *m, void *unused)
705{
706 int i = 0;
707
708 seq_printf(m, "controlling cpu: %d\n", control_cpu);
709 seq_printf(m, "pct confidence steady dynamic (compensation)\n");
710 for (i = 0; i < MAX_TARGET_RATIO; i++) {
711 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
712 i,
713 cal_data[i].confidence,
714 cal_data[i].steady_comp,
715 cal_data[i].dynamic_comp);
716 }
717
718 return 0;
719}
720
721static int powerclamp_debug_open(struct inode *inode,
722 struct file *file)
723{
724 return single_open(file, powerclamp_debug_show, inode->i_private);
725}
726
727static const struct file_operations powerclamp_debug_fops = {
728 .open = powerclamp_debug_open,
729 .read = seq_read,
730 .llseek = seq_lseek,
731 .release = single_release,
732 .owner = THIS_MODULE,
733};
734
735static inline void powerclamp_create_debug_files(void)
736{
737 debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
738 if (!debug_dir)
739 return;
740
741 if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
742 cal_data, &powerclamp_debug_fops))
743 goto file_error;
744
745 return;
746
747file_error:
748 debugfs_remove_recursive(debug_dir);
749}
750
751static int powerclamp_init(void)
752{
753 int retval;
754 int bitmap_size;
755
756 bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
757 cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
758 if (!cpu_clamping_mask)
759 return -ENOMEM;
760
761 /* probe cpu features and ids here */
762 retval = powerclamp_probe();
763 if (retval)
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530764 goto exit_free;
765
Jacob Pand6d71ee2013-01-21 04:37:57 -0800766 /* set default limit, maybe adjusted during runtime based on feedback */
767 window_size = 2;
768 register_hotcpu_notifier(&powerclamp_cpu_notifier);
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530769
Jacob Pand6d71ee2013-01-21 04:37:57 -0800770 powerclamp_thread = alloc_percpu(struct task_struct *);
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530771 if (!powerclamp_thread) {
772 retval = -ENOMEM;
773 goto exit_unregister;
774 }
775
Jacob Pand6d71ee2013-01-21 04:37:57 -0800776 cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
777 &powerclamp_cooling_ops);
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530778 if (IS_ERR(cooling_dev)) {
779 retval = -ENODEV;
780 goto exit_free_thread;
781 }
Jacob Pand6d71ee2013-01-21 04:37:57 -0800782
783 if (!duration)
784 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530785
Jacob Pand6d71ee2013-01-21 04:37:57 -0800786 powerclamp_create_debug_files();
787
788 return 0;
durgadoss.r@intel.comc32a5082013-10-04 21:53:24 +0530789
790exit_free_thread:
791 free_percpu(powerclamp_thread);
792exit_unregister:
793 unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
794exit_free:
795 kfree(cpu_clamping_mask);
796 return retval;
Jacob Pand6d71ee2013-01-21 04:37:57 -0800797}
798module_init(powerclamp_init);
799
800static void powerclamp_exit(void)
801{
802 unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
803 end_power_clamp();
804 free_percpu(powerclamp_thread);
805 thermal_cooling_device_unregister(cooling_dev);
806 kfree(cpu_clamping_mask);
807
808 cancel_delayed_work_sync(&poll_pkg_cstate_work);
809 debugfs_remove_recursive(debug_dir);
810}
811module_exit(powerclamp_exit);
812
813MODULE_LICENSE("GPL");
814MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
815MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
816MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");