blob: c325ba1bdddf4afbccc86a177dd57ce495412a03 [file] [log] [blame]
Andi Kleen2aae9502007-07-21 17:10:01 +02001/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
Andy Lutomirskif144a6b2011-05-23 09:31:30 -04005 * Fast user context implementation of clock_gettime, gettimeofday, and time.
Andi Kleen2aae9502007-07-21 17:10:01 +02006 *
Stefani Seibold7a59ed42014-03-17 23:22:09 +01007 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
Andi Kleen2aae9502007-07-21 17:10:01 +020010 * The code should have no internal unresolved relocations.
11 * Check with readelf after changing.
Andi Kleen2aae9502007-07-21 17:10:01 +020012 */
13
Stefani Seibold7a59ed42014-03-17 23:22:09 +010014#include <uapi/linux/time.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020015#include <asm/vgtod.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020016#include <asm/hpet.h>
Stefani Seibold7c031562014-03-17 23:22:10 +010017#include <asm/vvar.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020018#include <asm/unistd.h>
Stefani Seibold7c031562014-03-17 23:22:10 +010019#include <asm/msr.h>
20#include <linux/math64.h>
21#include <linux/time.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020022
Andy Lutomirski8c49d9a2011-05-23 09:31:24 -040023#define gtod (&VVAR(vsyscall_gtod_data))
Andi Kleen2aae9502007-07-21 17:10:01 +020024
Stefani Seibold7a59ed42014-03-17 23:22:09 +010025extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
26extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
27extern time_t __vdso_time(time_t *t);
28
Stefani Seibold7c031562014-03-17 23:22:10 +010029#ifdef CONFIG_HPET_TIMER
Andy Lutomirskif40c3302014-05-05 12:19:36 -070030extern u8 hpet_page
31 __attribute__((visibility("hidden")));
32
33static notrace cycle_t vread_hpet(void)
Stefani Seibold7c031562014-03-17 23:22:10 +010034{
Andy Lutomirskif40c3302014-05-05 12:19:36 -070035 return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
Stefani Seibold7c031562014-03-17 23:22:10 +010036}
37#endif
38
Stefani Seibold7a59ed42014-03-17 23:22:09 +010039#ifndef BUILD_VDSO32
40
Stefani Seibold7c031562014-03-17 23:22:10 +010041#include <linux/kernel.h>
42#include <asm/vsyscall.h>
43#include <asm/fixmap.h>
44#include <asm/pvclock.h>
45
Stefani Seibold411f7902014-03-17 23:22:03 +010046notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
47{
48 long ret;
49 asm("syscall" : "=a" (ret) :
50 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
51 return ret;
52}
53
54notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
55{
56 long ret;
57
58 asm("syscall" : "=a" (ret) :
59 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
60 return ret;
61}
62
Marcelo Tosatti51c19b42012-11-27 23:28:57 -020063#ifdef CONFIG_PARAVIRT_CLOCK
64
65static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
66{
67 const struct pvclock_vsyscall_time_info *pvti_base;
68 int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
69 int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
70
71 BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
72
73 pvti_base = (struct pvclock_vsyscall_time_info *)
74 __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
75
76 return &pvti_base[offset];
77}
78
79static notrace cycle_t vread_pvclock(int *mode)
80{
Andy Lutomirski6b078f52015-12-10 19:20:19 -080081 const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti;
Marcelo Tosatti51c19b42012-11-27 23:28:57 -020082 cycle_t ret;
Andy Lutomirski6b078f52015-12-10 19:20:19 -080083 u64 tsc, pvti_tsc;
84 u64 last, delta, pvti_system_time;
85 u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
Marcelo Tosatti51c19b42012-11-27 23:28:57 -020086
87 /*
Andy Lutomirski6b078f52015-12-10 19:20:19 -080088 * Note: The kernel and hypervisor must guarantee that cpu ID
89 * number maps 1:1 to per-CPU pvclock time info.
Paolo Bonzini73459e22015-04-23 13:20:18 +020090 *
Andy Lutomirski6b078f52015-12-10 19:20:19 -080091 * Because the hypervisor is entirely unaware of guest userspace
92 * preemption, it cannot guarantee that per-CPU pvclock time
93 * info is updated if the underlying CPU changes or that that
94 * version is increased whenever underlying CPU changes.
95 *
96 * On KVM, we are guaranteed that pvti updates for any vCPU are
97 * atomic as seen by *all* vCPUs. This is an even stronger
98 * guarantee than we get with a normal seqlock.
99 *
100 * On Xen, we don't appear to have that guarantee, but Xen still
101 * supplies a valid seqlock using the version field.
102
103 * We only do pvclock vdso timing at all if
104 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
105 * mean that all vCPUs have matching pvti and that the TSC is
106 * synced, so we can just look at vCPU 0's pvti.
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200107 */
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200108
Andy Lutomirski6b078f52015-12-10 19:20:19 -0800109 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200110 *mode = VCLOCK_NONE;
Andy Lutomirski6b078f52015-12-10 19:20:19 -0800111 return 0;
112 }
113
114 do {
115 version = pvti->version;
116
117 /* This is also a read barrier, so we'll read version first. */
118 tsc = rdtsc_ordered();
119
120 pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
121 pvti_tsc_shift = pvti->tsc_shift;
122 pvti_system_time = pvti->system_time;
123 pvti_tsc = pvti->tsc_timestamp;
124
125 /* Make sure that the version double-check is last. */
126 smp_rmb();
127 } while (unlikely((version & 1) || version != pvti->version));
128
129 delta = tsc - pvti_tsc;
130 ret = pvti_system_time +
131 pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
132 pvti_tsc_shift);
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200133
134 /* refer to tsc.c read_tsc() comment for rationale */
Stefani Seibold7c031562014-03-17 23:22:10 +0100135 last = gtod->cycle_last;
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200136
137 if (likely(ret >= last))
138 return ret;
139
140 return last;
141}
142#endif
143
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100144#else
145
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100146notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
147{
148 long ret;
149
150 asm(
151 "mov %%ebx, %%edx \n"
152 "mov %2, %%ebx \n"
Andy Lutomirski6f121e52014-05-05 12:19:34 -0700153 "call __kernel_vsyscall \n"
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100154 "mov %%edx, %%ebx \n"
155 : "=a" (ret)
156 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
157 : "memory", "edx");
158 return ret;
159}
160
161notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
162{
163 long ret;
164
165 asm(
166 "mov %%ebx, %%edx \n"
167 "mov %2, %%ebx \n"
Andy Lutomirski6f121e52014-05-05 12:19:34 -0700168 "call __kernel_vsyscall \n"
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100169 "mov %%edx, %%ebx \n"
170 : "=a" (ret)
171 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
172 : "memory", "edx");
173 return ret;
174}
175
176#ifdef CONFIG_PARAVIRT_CLOCK
177
178static notrace cycle_t vread_pvclock(int *mode)
179{
180 *mode = VCLOCK_NONE;
181 return 0;
182}
183#endif
184
185#endif
186
Stefani Seibold411f7902014-03-17 23:22:03 +0100187notrace static cycle_t vread_tsc(void)
Andi Kleen2aae9502007-07-21 17:10:01 +0200188{
Andy Lutomirski03b97302015-06-25 18:44:08 +0200189 cycle_t ret = (cycle_t)rdtsc_ordered();
190 u64 last = gtod->cycle_last;
Stefani Seibold411f7902014-03-17 23:22:03 +0100191
192 if (likely(ret >= last))
193 return ret;
194
195 /*
196 * GCC likes to generate cmov here, but this branch is extremely
197 * predictable (it's just a funciton of time and the likely is
198 * very likely) and there's a data dependence, so force GCC
199 * to generate a branch instead. I don't barrier() because
200 * we don't actually need a barrier, and if this function
201 * ever gets inlined it will generate worse code.
202 */
203 asm volatile ("");
204 return last;
Andi Kleen2aae9502007-07-21 17:10:01 +0200205}
206
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200207notrace static inline u64 vgetsns(int *mode)
Andi Kleen2aae9502007-07-21 17:10:01 +0200208{
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100209 u64 v;
Andy Lutomirski98d0ac32011-07-14 06:47:22 -0400210 cycles_t cycles;
Stefani Seibold7c031562014-03-17 23:22:10 +0100211
212 if (gtod->vclock_mode == VCLOCK_TSC)
Andy Lutomirski98d0ac32011-07-14 06:47:22 -0400213 cycles = vread_tsc();
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100214#ifdef CONFIG_HPET_TIMER
Stefani Seibold7c031562014-03-17 23:22:10 +0100215 else if (gtod->vclock_mode == VCLOCK_HPET)
Andy Lutomirski98d0ac32011-07-14 06:47:22 -0400216 cycles = vread_hpet();
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100217#endif
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200218#ifdef CONFIG_PARAVIRT_CLOCK
Stefani Seibold7c031562014-03-17 23:22:10 +0100219 else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200220 cycles = vread_pvclock(mode);
221#endif
John Stultza939e812012-03-01 22:11:09 -0800222 else
223 return 0;
Stefani Seibold7c031562014-03-17 23:22:10 +0100224 v = (cycles - gtod->cycle_last) & gtod->mask;
225 return v * gtod->mult;
Andi Kleen2aae9502007-07-21 17:10:01 +0200226}
227
Andy Lutomirski5f293472012-03-22 21:15:52 -0700228/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
229notrace static int __always_inline do_realtime(struct timespec *ts)
Andi Kleen2aae9502007-07-21 17:10:01 +0200230{
John Stultz650ea022012-09-04 16:14:46 -0400231 unsigned long seq;
232 u64 ns;
John Stultza939e812012-03-01 22:11:09 -0800233 int mode;
234
Andi Kleen2aae9502007-07-21 17:10:01 +0200235 do {
Stefani Seibold7c031562014-03-17 23:22:10 +0100236 seq = gtod_read_begin(gtod);
237 mode = gtod->vclock_mode;
Andi Kleen2aae9502007-07-21 17:10:01 +0200238 ts->tv_sec = gtod->wall_time_sec;
John Stultz650ea022012-09-04 16:14:46 -0400239 ns = gtod->wall_time_snsec;
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200240 ns += vgetsns(&mode);
Stefani Seibold7c031562014-03-17 23:22:10 +0100241 ns >>= gtod->shift;
242 } while (unlikely(gtod_read_retry(gtod, seq)));
John Stultza939e812012-03-01 22:11:09 -0800243
Stefani Seibold7c031562014-03-17 23:22:10 +0100244 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
245 ts->tv_nsec = ns;
246
John Stultza939e812012-03-01 22:11:09 -0800247 return mode;
Andi Kleen2aae9502007-07-21 17:10:01 +0200248}
249
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100250notrace static int __always_inline do_monotonic(struct timespec *ts)
Andi Kleen2aae9502007-07-21 17:10:01 +0200251{
John Stultz650ea022012-09-04 16:14:46 -0400252 unsigned long seq;
253 u64 ns;
John Stultza939e812012-03-01 22:11:09 -0800254 int mode;
255
Andi Kleen2aae9502007-07-21 17:10:01 +0200256 do {
Stefani Seibold7c031562014-03-17 23:22:10 +0100257 seq = gtod_read_begin(gtod);
258 mode = gtod->vclock_mode;
Andy Lutomirski91ec87d2012-03-22 21:15:51 -0700259 ts->tv_sec = gtod->monotonic_time_sec;
John Stultz650ea022012-09-04 16:14:46 -0400260 ns = gtod->monotonic_time_snsec;
Marcelo Tosatti51c19b42012-11-27 23:28:57 -0200261 ns += vgetsns(&mode);
Stefani Seibold7c031562014-03-17 23:22:10 +0100262 ns >>= gtod->shift;
263 } while (unlikely(gtod_read_retry(gtod, seq)));
264
265 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
266 ts->tv_nsec = ns;
Andy Lutomirski0f51f282011-05-23 09:31:27 -0400267
John Stultza939e812012-03-01 22:11:09 -0800268 return mode;
Andi Kleen2aae9502007-07-21 17:10:01 +0200269}
270
Stefani Seiboldce39c642014-03-17 23:22:04 +0100271notrace static void do_realtime_coarse(struct timespec *ts)
john stultzda15cfd2009-08-19 19:13:34 -0700272{
273 unsigned long seq;
274 do {
Stefani Seibold7c031562014-03-17 23:22:10 +0100275 seq = gtod_read_begin(gtod);
276 ts->tv_sec = gtod->wall_time_coarse_sec;
277 ts->tv_nsec = gtod->wall_time_coarse_nsec;
278 } while (unlikely(gtod_read_retry(gtod, seq)));
john stultzda15cfd2009-08-19 19:13:34 -0700279}
280
Stefani Seiboldce39c642014-03-17 23:22:04 +0100281notrace static void do_monotonic_coarse(struct timespec *ts)
john stultzda15cfd2009-08-19 19:13:34 -0700282{
Andy Lutomirski91ec87d2012-03-22 21:15:51 -0700283 unsigned long seq;
john stultzda15cfd2009-08-19 19:13:34 -0700284 do {
Stefani Seibold7c031562014-03-17 23:22:10 +0100285 seq = gtod_read_begin(gtod);
286 ts->tv_sec = gtod->monotonic_time_coarse_sec;
287 ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
288 } while (unlikely(gtod_read_retry(gtod, seq)));
john stultzda15cfd2009-08-19 19:13:34 -0700289}
290
Steven Rostedt23adec52008-05-12 21:20:41 +0200291notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
Andi Kleen2aae9502007-07-21 17:10:01 +0200292{
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400293 switch (clock) {
294 case CLOCK_REALTIME:
Stefani Seiboldce39c642014-03-17 23:22:04 +0100295 if (do_realtime(ts) == VCLOCK_NONE)
296 goto fallback;
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400297 break;
298 case CLOCK_MONOTONIC:
Stefani Seiboldce39c642014-03-17 23:22:04 +0100299 if (do_monotonic(ts) == VCLOCK_NONE)
300 goto fallback;
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400301 break;
302 case CLOCK_REALTIME_COARSE:
Stefani Seiboldce39c642014-03-17 23:22:04 +0100303 do_realtime_coarse(ts);
304 break;
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400305 case CLOCK_MONOTONIC_COARSE:
Stefani Seiboldce39c642014-03-17 23:22:04 +0100306 do_monotonic_coarse(ts);
307 break;
308 default:
309 goto fallback;
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400310 }
311
John Stultza939e812012-03-01 22:11:09 -0800312 return 0;
Stefani Seiboldce39c642014-03-17 23:22:04 +0100313fallback:
314 return vdso_fallback_gettime(clock, ts);
Andi Kleen2aae9502007-07-21 17:10:01 +0200315}
316int clock_gettime(clockid_t, struct timespec *)
317 __attribute__((weak, alias("__vdso_clock_gettime")));
318
Steven Rostedt23adec52008-05-12 21:20:41 +0200319notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
Andi Kleen2aae9502007-07-21 17:10:01 +0200320{
John Stultza939e812012-03-01 22:11:09 -0800321 if (likely(tv != NULL)) {
Stefani Seibold0df1ea22014-03-17 23:22:06 +0100322 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
323 return vdso_fallback_gtod(tv, tz);
John Stultza939e812012-03-01 22:11:09 -0800324 tv->tv_usec /= 1000;
Andi Kleen2aae9502007-07-21 17:10:01 +0200325 }
John Stultza939e812012-03-01 22:11:09 -0800326 if (unlikely(tz != NULL)) {
Stefani Seibold7c031562014-03-17 23:22:10 +0100327 tz->tz_minuteswest = gtod->tz_minuteswest;
328 tz->tz_dsttime = gtod->tz_dsttime;
John Stultza939e812012-03-01 22:11:09 -0800329 }
330
John Stultza939e812012-03-01 22:11:09 -0800331 return 0;
Andi Kleen2aae9502007-07-21 17:10:01 +0200332}
333int gettimeofday(struct timeval *, struct timezone *)
334 __attribute__((weak, alias("__vdso_gettimeofday")));
Andy Lutomirskif144a6b2011-05-23 09:31:30 -0400335
Andy Lutomirski0d7b8542011-06-05 13:50:20 -0400336/*
337 * This will break when the xtime seconds get inaccurate, but that is
338 * unlikely
339 */
Andy Lutomirskif144a6b2011-05-23 09:31:30 -0400340notrace time_t __vdso_time(time_t *t)
341{
Stefani Seibold7a59ed42014-03-17 23:22:09 +0100342 /* This is atomic on x86 so we don't need any locks. */
Stefani Seiboldaf8c93d2014-03-17 23:22:05 +0100343 time_t result = ACCESS_ONCE(gtod->wall_time_sec);
Andy Lutomirskif144a6b2011-05-23 09:31:30 -0400344
345 if (t)
346 *t = result;
347 return result;
348}
349int time(time_t *t)
350 __attribute__((weak, alias("__vdso_time")));