Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 1 | /****************************************************************************** |
| 2 | * arch/ia64/xen/time.c |
| 3 | * |
| 4 | * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> |
| 5 | * VA Linux Systems Japan K.K. |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or modify |
| 8 | * it under the terms of the GNU General Public License as published by |
| 9 | * the Free Software Foundation; either version 2 of the License, or |
| 10 | * (at your option) any later version. |
| 11 | * |
| 12 | * This program is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | * GNU General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License |
| 18 | * along with this program; if not, write to the Free Software |
| 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 20 | * |
| 21 | */ |
| 22 | |
| 23 | #include <linux/delay.h> |
| 24 | #include <linux/kernel_stat.h> |
| 25 | #include <linux/posix-timers.h> |
| 26 | #include <linux/irq.h> |
| 27 | #include <linux/clocksource.h> |
| 28 | |
Isaku Yamahata | 91834e6 | 2008-10-17 11:18:10 +0900 | [diff] [blame] | 29 | #include <asm/timex.h> |
| 30 | |
Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 31 | #include <asm/xen/hypervisor.h> |
| 32 | |
| 33 | #include <xen/interface/vcpu.h> |
| 34 | |
| 35 | #include "../kernel/fsyscall_gtod_data.h" |
| 36 | |
| 37 | DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
| 38 | DEFINE_PER_CPU(unsigned long, processed_stolen_time); |
| 39 | DEFINE_PER_CPU(unsigned long, processed_blocked_time); |
| 40 | |
| 41 | /* taken from i386/kernel/time-xen.c */ |
| 42 | static void xen_init_missing_ticks_accounting(int cpu) |
| 43 | { |
| 44 | struct vcpu_register_runstate_memory_area area; |
| 45 | struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); |
| 46 | int rc; |
| 47 | |
| 48 | memset(runstate, 0, sizeof(*runstate)); |
| 49 | |
| 50 | area.addr.v = runstate; |
| 51 | rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, |
| 52 | &area); |
| 53 | WARN_ON(rc && rc != -ENOSYS); |
| 54 | |
| 55 | per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; |
| 56 | per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] |
| 57 | + runstate->time[RUNSTATE_offline]; |
| 58 | } |
| 59 | |
| 60 | /* |
| 61 | * Runstate accounting |
| 62 | */ |
| 63 | /* stolen from arch/x86/xen/time.c */ |
| 64 | static void get_runstate_snapshot(struct vcpu_runstate_info *res) |
| 65 | { |
| 66 | u64 state_time; |
| 67 | struct vcpu_runstate_info *state; |
| 68 | |
| 69 | BUG_ON(preemptible()); |
| 70 | |
| 71 | state = &__get_cpu_var(runstate); |
| 72 | |
| 73 | /* |
| 74 | * The runstate info is always updated by the hypervisor on |
| 75 | * the current CPU, so there's no need to use anything |
| 76 | * stronger than a compiler barrier when fetching it. |
| 77 | */ |
| 78 | do { |
| 79 | state_time = state->state_entry_time; |
| 80 | rmb(); |
| 81 | *res = *state; |
| 82 | rmb(); |
| 83 | } while (state->state_entry_time != state_time); |
| 84 | } |
| 85 | |
| 86 | #define NS_PER_TICK (1000000000LL/HZ) |
| 87 | |
| 88 | static unsigned long |
| 89 | consider_steal_time(unsigned long new_itm) |
| 90 | { |
| 91 | unsigned long stolen, blocked; |
| 92 | unsigned long delta_itm = 0, stolentick = 0; |
| 93 | int cpu = smp_processor_id(); |
| 94 | struct vcpu_runstate_info runstate; |
| 95 | struct task_struct *p = current; |
| 96 | |
| 97 | get_runstate_snapshot(&runstate); |
| 98 | |
| 99 | /* |
| 100 | * Check for vcpu migration effect |
| 101 | * In this case, itc value is reversed. |
| 102 | * This causes huge stolen value. |
| 103 | * This function just checks and reject this effect. |
| 104 | */ |
| 105 | if (!time_after_eq(runstate.time[RUNSTATE_blocked], |
| 106 | per_cpu(processed_blocked_time, cpu))) |
| 107 | blocked = 0; |
| 108 | |
| 109 | if (!time_after_eq(runstate.time[RUNSTATE_runnable] + |
| 110 | runstate.time[RUNSTATE_offline], |
| 111 | per_cpu(processed_stolen_time, cpu))) |
| 112 | stolen = 0; |
| 113 | |
| 114 | if (!time_after(delta_itm + new_itm, ia64_get_itc())) |
| 115 | stolentick = ia64_get_itc() - new_itm; |
| 116 | |
| 117 | do_div(stolentick, NS_PER_TICK); |
| 118 | stolentick++; |
| 119 | |
| 120 | do_div(stolen, NS_PER_TICK); |
| 121 | |
| 122 | if (stolen > stolentick) |
| 123 | stolen = stolentick; |
| 124 | |
| 125 | stolentick -= stolen; |
| 126 | do_div(blocked, NS_PER_TICK); |
| 127 | |
| 128 | if (blocked > stolentick) |
| 129 | blocked = stolentick; |
| 130 | |
| 131 | if (stolen > 0 || blocked > 0) { |
Isaku Yamahata | 7a0b6e0 | 2009-01-15 15:16:55 +0900 | [diff] [blame] | 132 | account_steal_ticks(stolen); |
| 133 | account_idle_ticks(blocked); |
Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 134 | run_local_timers(); |
| 135 | |
Paul E. McKenney | a157229 | 2009-08-22 13:56:51 -0700 | [diff] [blame] | 136 | rcu_check_callbacks(cpu, user_mode(get_irq_regs())); |
Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 137 | |
| 138 | scheduler_tick(); |
| 139 | run_posix_cpu_timers(p); |
| 140 | delta_itm += local_cpu_data->itm_delta * (stolen + blocked); |
| 141 | |
| 142 | if (cpu == time_keeper_id) { |
| 143 | write_seqlock(&xtime_lock); |
| 144 | do_timer(stolen + blocked); |
| 145 | local_cpu_data->itm_next = delta_itm + new_itm; |
| 146 | write_sequnlock(&xtime_lock); |
| 147 | } else { |
| 148 | local_cpu_data->itm_next = delta_itm + new_itm; |
| 149 | } |
| 150 | per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen; |
| 151 | per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked; |
| 152 | } |
| 153 | return delta_itm; |
| 154 | } |
| 155 | |
| 156 | static int xen_do_steal_accounting(unsigned long *new_itm) |
| 157 | { |
| 158 | unsigned long delta_itm; |
| 159 | delta_itm = consider_steal_time(*new_itm); |
| 160 | *new_itm += delta_itm; |
| 161 | if (time_after(*new_itm, ia64_get_itc()) && delta_itm) |
| 162 | return 1; |
| 163 | |
| 164 | return 0; |
| 165 | } |
| 166 | |
| 167 | static void xen_itc_jitter_data_reset(void) |
| 168 | { |
| 169 | u64 lcycle, ret; |
| 170 | |
| 171 | do { |
| 172 | lcycle = itc_jitter_data.itc_lastcycle; |
| 173 | ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0); |
| 174 | } while (unlikely(ret != lcycle)); |
| 175 | } |
| 176 | |
Isaku Yamahata | 1aec1c5 | 2009-03-04 21:05:41 +0900 | [diff] [blame] | 177 | /* based on xen_sched_clock() in arch/x86/xen/time.c. */ |
| 178 | /* |
| 179 | * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined, |
| 180 | * something similar logic should be implemented here. |
| 181 | */ |
| 182 | /* |
| 183 | * Xen sched_clock implementation. Returns the number of unstolen |
| 184 | * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED |
| 185 | * states. |
| 186 | */ |
| 187 | static unsigned long long xen_sched_clock(void) |
| 188 | { |
| 189 | struct vcpu_runstate_info runstate; |
| 190 | |
| 191 | unsigned long long now; |
| 192 | unsigned long long offset; |
| 193 | unsigned long long ret; |
| 194 | |
| 195 | /* |
| 196 | * Ideally sched_clock should be called on a per-cpu basis |
| 197 | * anyway, so preempt should already be disabled, but that's |
| 198 | * not current practice at the moment. |
| 199 | */ |
| 200 | preempt_disable(); |
| 201 | |
| 202 | /* |
| 203 | * both ia64_native_sched_clock() and xen's runstate are |
| 204 | * based on mAR.ITC. So difference of them makes sense. |
| 205 | */ |
| 206 | now = ia64_native_sched_clock(); |
| 207 | |
| 208 | get_runstate_snapshot(&runstate); |
| 209 | |
| 210 | WARN_ON(runstate.state != RUNSTATE_running); |
| 211 | |
| 212 | offset = 0; |
| 213 | if (now > runstate.state_entry_time) |
| 214 | offset = now - runstate.state_entry_time; |
| 215 | ret = runstate.time[RUNSTATE_blocked] + |
| 216 | runstate.time[RUNSTATE_running] + |
| 217 | offset; |
| 218 | |
| 219 | preempt_enable(); |
| 220 | |
| 221 | return ret; |
| 222 | } |
| 223 | |
Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 224 | struct pv_time_ops xen_time_ops __initdata = { |
| 225 | .init_missing_ticks_accounting = xen_init_missing_ticks_accounting, |
| 226 | .do_steal_accounting = xen_do_steal_accounting, |
| 227 | .clocksource_resume = xen_itc_jitter_data_reset, |
Isaku Yamahata | 1aec1c5 | 2009-03-04 21:05:41 +0900 | [diff] [blame] | 228 | .sched_clock = xen_sched_clock, |
Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 229 | }; |
Isaku Yamahata | 91834e6 | 2008-10-17 11:18:10 +0900 | [diff] [blame] | 230 | |
| 231 | /* Called after suspend, to resume time. */ |
| 232 | static void xen_local_tick_resume(void) |
| 233 | { |
| 234 | /* Just trigger a tick. */ |
| 235 | ia64_cpu_local_tick(); |
| 236 | touch_softlockup_watchdog(); |
| 237 | } |
| 238 | |
| 239 | void |
| 240 | xen_timer_resume(void) |
| 241 | { |
| 242 | unsigned int cpu; |
| 243 | |
| 244 | xen_local_tick_resume(); |
| 245 | |
| 246 | for_each_online_cpu(cpu) |
| 247 | xen_init_missing_ticks_accounting(cpu); |
| 248 | } |
| 249 | |
| 250 | static void ia64_cpu_local_tick_fn(void *unused) |
| 251 | { |
| 252 | xen_local_tick_resume(); |
| 253 | xen_init_missing_ticks_accounting(smp_processor_id()); |
| 254 | } |
| 255 | |
| 256 | void |
| 257 | xen_timer_resume_on_aps(void) |
| 258 | { |
| 259 | smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1); |
| 260 | } |