James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 1 | /*: |
| 2 | * Hibernate support specific for ARM64 |
| 3 | * |
| 4 | * Derived from work on ARM hibernation support by: |
| 5 | * |
| 6 | * Ubuntu project, hibernation support for mach-dove |
| 7 | * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) |
| 8 | * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) |
| 9 | * https://lkml.org/lkml/2010/6/18/4 |
| 10 | * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html |
| 11 | * https://patchwork.kernel.org/patch/96442/ |
| 12 | * |
| 13 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
| 14 | * |
| 15 | * License terms: GNU General Public License (GPL) version 2 |
| 16 | */ |
| 17 | #define pr_fmt(x) "hibernate: " x |
| 18 | #include <linux/kvm_host.h> |
| 19 | #include <linux/mm.h> |
James Morse | 1fe492c | 2016-04-27 17:47:13 +0100 | [diff] [blame] | 20 | #include <linux/notifier.h> |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 21 | #include <linux/pm.h> |
| 22 | #include <linux/sched.h> |
| 23 | #include <linux/suspend.h> |
| 24 | #include <linux/utsname.h> |
| 25 | #include <linux/version.h> |
| 26 | |
| 27 | #include <asm/barrier.h> |
| 28 | #include <asm/cacheflush.h> |
| 29 | #include <asm/irqflags.h> |
| 30 | #include <asm/memory.h> |
| 31 | #include <asm/mmu_context.h> |
| 32 | #include <asm/pgalloc.h> |
| 33 | #include <asm/pgtable.h> |
| 34 | #include <asm/pgtable-hwdef.h> |
| 35 | #include <asm/sections.h> |
James Morse | d74b4e4 | 2016-06-22 10:06:13 +0100 | [diff] [blame] | 36 | #include <asm/smp.h> |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 37 | #include <asm/suspend.h> |
Mark Rutland | 0194e76 | 2016-08-11 14:11:05 +0100 | [diff] [blame] | 38 | #include <asm/sysreg.h> |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 39 | #include <asm/virt.h> |
| 40 | |
| 41 | /* |
| 42 | * Hibernate core relies on this value being 0 on resume, and marks it |
| 43 | * __nosavedata assuming it will keep the resume kernel's '0' value. This |
| 44 | * doesn't happen with either KASLR. |
| 45 | * |
| 46 | * defined as "__visible int in_suspend __nosavedata" in |
| 47 | * kernel/power/hibernate.c |
| 48 | */ |
| 49 | extern int in_suspend; |
| 50 | |
| 51 | /* Find a symbols alias in the linear map */ |
| 52 | #define LMADDR(x) phys_to_virt(virt_to_phys(x)) |
| 53 | |
| 54 | /* Do we need to reset el2? */ |
| 55 | #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) |
| 56 | |
| 57 | /* |
| 58 | * Start/end of the hibernate exit code, this must be copied to a 'safe' |
| 59 | * location in memory, and executed from there. |
| 60 | */ |
| 61 | extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; |
| 62 | |
| 63 | /* temporary el2 vectors in the __hibernate_exit_text section. */ |
| 64 | extern char hibernate_el2_vectors[]; |
| 65 | |
| 66 | /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ |
| 67 | extern char __hyp_stub_vectors[]; |
| 68 | |
| 69 | /* |
| 70 | * Values that may not change over hibernate/resume. We put the build number |
| 71 | * and date in here so that we guarantee not to resume with a different |
| 72 | * kernel. |
| 73 | */ |
| 74 | struct arch_hibernate_hdr_invariants { |
| 75 | char uts_version[__NEW_UTS_LEN + 1]; |
| 76 | }; |
| 77 | |
| 78 | /* These values need to be know across a hibernate/restore. */ |
| 79 | static struct arch_hibernate_hdr { |
| 80 | struct arch_hibernate_hdr_invariants invariants; |
| 81 | |
| 82 | /* These are needed to find the relocated kernel if built with kaslr */ |
| 83 | phys_addr_t ttbr1_el1; |
| 84 | void (*reenter_kernel)(void); |
| 85 | |
| 86 | /* |
| 87 | * We need to know where the __hyp_stub_vectors are after restore to |
| 88 | * re-configure el2. |
| 89 | */ |
| 90 | phys_addr_t __hyp_stub_vectors; |
| 91 | } resume_hdr; |
| 92 | |
| 93 | static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) |
| 94 | { |
| 95 | memset(i, 0, sizeof(*i)); |
| 96 | memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); |
| 97 | } |
| 98 | |
| 99 | int pfn_is_nosave(unsigned long pfn) |
| 100 | { |
| 101 | unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); |
| 102 | unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); |
| 103 | |
| 104 | return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); |
| 105 | } |
| 106 | |
| 107 | void notrace save_processor_state(void) |
| 108 | { |
| 109 | WARN_ON(num_online_cpus() != 1); |
| 110 | } |
| 111 | |
| 112 | void notrace restore_processor_state(void) |
| 113 | { |
| 114 | } |
| 115 | |
| 116 | int arch_hibernation_header_save(void *addr, unsigned int max_size) |
| 117 | { |
| 118 | struct arch_hibernate_hdr *hdr = addr; |
| 119 | |
| 120 | if (max_size < sizeof(*hdr)) |
| 121 | return -EOVERFLOW; |
| 122 | |
| 123 | arch_hdr_invariants(&hdr->invariants); |
| 124 | hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); |
| 125 | hdr->reenter_kernel = _cpu_resume; |
| 126 | |
| 127 | /* We can't use __hyp_get_vectors() because kvm may still be loaded */ |
| 128 | if (el2_reset_needed()) |
| 129 | hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); |
| 130 | else |
| 131 | hdr->__hyp_stub_vectors = 0; |
| 132 | |
| 133 | return 0; |
| 134 | } |
| 135 | EXPORT_SYMBOL(arch_hibernation_header_save); |
| 136 | |
| 137 | int arch_hibernation_header_restore(void *addr) |
| 138 | { |
| 139 | struct arch_hibernate_hdr_invariants invariants; |
| 140 | struct arch_hibernate_hdr *hdr = addr; |
| 141 | |
| 142 | arch_hdr_invariants(&invariants); |
| 143 | if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { |
| 144 | pr_crit("Hibernate image not generated by this kernel!\n"); |
| 145 | return -EINVAL; |
| 146 | } |
| 147 | |
| 148 | resume_hdr = *hdr; |
| 149 | |
| 150 | return 0; |
| 151 | } |
| 152 | EXPORT_SYMBOL(arch_hibernation_header_restore); |
| 153 | |
| 154 | /* |
| 155 | * Copies length bytes, starting at src_start into an new page, |
| 156 | * perform cache maintentance, then maps it at the specified address low |
| 157 | * address as executable. |
| 158 | * |
| 159 | * This is used by hibernate to copy the code it needs to execute when |
| 160 | * overwriting the kernel text. This function generates a new set of page |
| 161 | * tables, which it loads into ttbr0. |
| 162 | * |
| 163 | * Length is provided as we probably only want 4K of data, even on a 64K |
| 164 | * page system. |
| 165 | */ |
| 166 | static int create_safe_exec_page(void *src_start, size_t length, |
| 167 | unsigned long dst_addr, |
| 168 | phys_addr_t *phys_dst_addr, |
| 169 | void *(*allocator)(gfp_t mask), |
| 170 | gfp_t mask) |
| 171 | { |
| 172 | int rc = 0; |
| 173 | pgd_t *pgd; |
| 174 | pud_t *pud; |
| 175 | pmd_t *pmd; |
| 176 | pte_t *pte; |
| 177 | unsigned long dst = (unsigned long)allocator(mask); |
| 178 | |
| 179 | if (!dst) { |
| 180 | rc = -ENOMEM; |
| 181 | goto out; |
| 182 | } |
| 183 | |
| 184 | memcpy((void *)dst, src_start, length); |
| 185 | flush_icache_range(dst, dst + length); |
| 186 | |
| 187 | pgd = pgd_offset_raw(allocator(mask), dst_addr); |
| 188 | if (pgd_none(*pgd)) { |
| 189 | pud = allocator(mask); |
| 190 | if (!pud) { |
| 191 | rc = -ENOMEM; |
| 192 | goto out; |
| 193 | } |
| 194 | pgd_populate(&init_mm, pgd, pud); |
| 195 | } |
| 196 | |
| 197 | pud = pud_offset(pgd, dst_addr); |
| 198 | if (pud_none(*pud)) { |
| 199 | pmd = allocator(mask); |
| 200 | if (!pmd) { |
| 201 | rc = -ENOMEM; |
| 202 | goto out; |
| 203 | } |
| 204 | pud_populate(&init_mm, pud, pmd); |
| 205 | } |
| 206 | |
| 207 | pmd = pmd_offset(pud, dst_addr); |
| 208 | if (pmd_none(*pmd)) { |
| 209 | pte = allocator(mask); |
| 210 | if (!pte) { |
| 211 | rc = -ENOMEM; |
| 212 | goto out; |
| 213 | } |
| 214 | pmd_populate_kernel(&init_mm, pmd, pte); |
| 215 | } |
| 216 | |
| 217 | pte = pte_offset_kernel(pmd, dst_addr); |
| 218 | set_pte(pte, __pte(virt_to_phys((void *)dst) | |
| 219 | pgprot_val(PAGE_KERNEL_EXEC))); |
| 220 | |
Mark Rutland | 0194e76 | 2016-08-11 14:11:05 +0100 | [diff] [blame] | 221 | /* |
| 222 | * Load our new page tables. A strict BBM approach requires that we |
| 223 | * ensure that TLBs are free of any entries that may overlap with the |
| 224 | * global mappings we are about to install. |
| 225 | * |
| 226 | * For a real hibernate/resume cycle TTBR0 currently points to a zero |
| 227 | * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI |
| 228 | * runtime services), while for a userspace-driven test_resume cycle it |
| 229 | * points to userspace page tables (and we must point it at a zero page |
| 230 | * ourselves). Elsewhere we only (un)install the idmap with preemption |
| 231 | * disabled, so T0SZ should be as required regardless. |
| 232 | */ |
| 233 | cpu_set_reserved_ttbr0(); |
| 234 | local_flush_tlb_all(); |
| 235 | write_sysreg(virt_to_phys(pgd), ttbr0_el1); |
| 236 | isb(); |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 237 | |
| 238 | *phys_dst_addr = virt_to_phys((void *)dst); |
| 239 | |
| 240 | out: |
| 241 | return rc; |
| 242 | } |
| 243 | |
| 244 | |
| 245 | int swsusp_arch_suspend(void) |
| 246 | { |
| 247 | int ret = 0; |
| 248 | unsigned long flags; |
| 249 | struct sleep_stack_data state; |
| 250 | |
James Morse | d74b4e4 | 2016-06-22 10:06:13 +0100 | [diff] [blame] | 251 | if (cpus_are_stuck_in_kernel()) { |
| 252 | pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); |
| 253 | return -EBUSY; |
| 254 | } |
| 255 | |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 256 | local_dbg_save(flags); |
| 257 | |
| 258 | if (__cpu_suspend_enter(&state)) { |
| 259 | ret = swsusp_save(); |
| 260 | } else { |
| 261 | /* Clean kernel to PoC for secondary core startup */ |
| 262 | __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); |
| 263 | |
| 264 | /* |
| 265 | * Tell the hibernation core that we've just restored |
| 266 | * the memory |
| 267 | */ |
| 268 | in_suspend = 0; |
| 269 | |
| 270 | __cpu_suspend_exit(); |
| 271 | } |
| 272 | |
| 273 | local_dbg_restore(flags); |
| 274 | |
| 275 | return ret; |
| 276 | } |
| 277 | |
| 278 | static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, |
| 279 | unsigned long end) |
| 280 | { |
| 281 | pte_t *src_pte; |
| 282 | pte_t *dst_pte; |
| 283 | unsigned long addr = start; |
| 284 | |
| 285 | dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); |
| 286 | if (!dst_pte) |
| 287 | return -ENOMEM; |
| 288 | pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); |
| 289 | dst_pte = pte_offset_kernel(dst_pmd, start); |
| 290 | |
| 291 | src_pte = pte_offset_kernel(src_pmd, start); |
| 292 | do { |
| 293 | if (!pte_none(*src_pte)) |
| 294 | /* |
| 295 | * Resume will overwrite areas that may be marked |
| 296 | * read only (code, rodata). Clear the RDONLY bit from |
| 297 | * the temporary mappings we use during restore. |
| 298 | */ |
| 299 | set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); |
| 300 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); |
| 301 | |
| 302 | return 0; |
| 303 | } |
| 304 | |
| 305 | static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, |
| 306 | unsigned long end) |
| 307 | { |
| 308 | pmd_t *src_pmd; |
| 309 | pmd_t *dst_pmd; |
| 310 | unsigned long next; |
| 311 | unsigned long addr = start; |
| 312 | |
| 313 | if (pud_none(*dst_pud)) { |
| 314 | dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); |
| 315 | if (!dst_pmd) |
| 316 | return -ENOMEM; |
| 317 | pud_populate(&init_mm, dst_pud, dst_pmd); |
| 318 | } |
| 319 | dst_pmd = pmd_offset(dst_pud, start); |
| 320 | |
| 321 | src_pmd = pmd_offset(src_pud, start); |
| 322 | do { |
| 323 | next = pmd_addr_end(addr, end); |
| 324 | if (pmd_none(*src_pmd)) |
| 325 | continue; |
| 326 | if (pmd_table(*src_pmd)) { |
| 327 | if (copy_pte(dst_pmd, src_pmd, addr, next)) |
| 328 | return -ENOMEM; |
| 329 | } else { |
| 330 | set_pmd(dst_pmd, |
| 331 | __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); |
| 332 | } |
| 333 | } while (dst_pmd++, src_pmd++, addr = next, addr != end); |
| 334 | |
| 335 | return 0; |
| 336 | } |
| 337 | |
| 338 | static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, |
| 339 | unsigned long end) |
| 340 | { |
| 341 | pud_t *dst_pud; |
| 342 | pud_t *src_pud; |
| 343 | unsigned long next; |
| 344 | unsigned long addr = start; |
| 345 | |
| 346 | if (pgd_none(*dst_pgd)) { |
| 347 | dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); |
| 348 | if (!dst_pud) |
| 349 | return -ENOMEM; |
| 350 | pgd_populate(&init_mm, dst_pgd, dst_pud); |
| 351 | } |
| 352 | dst_pud = pud_offset(dst_pgd, start); |
| 353 | |
| 354 | src_pud = pud_offset(src_pgd, start); |
| 355 | do { |
| 356 | next = pud_addr_end(addr, end); |
| 357 | if (pud_none(*src_pud)) |
| 358 | continue; |
| 359 | if (pud_table(*(src_pud))) { |
| 360 | if (copy_pmd(dst_pud, src_pud, addr, next)) |
| 361 | return -ENOMEM; |
| 362 | } else { |
| 363 | set_pud(dst_pud, |
| 364 | __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); |
| 365 | } |
| 366 | } while (dst_pud++, src_pud++, addr = next, addr != end); |
| 367 | |
| 368 | return 0; |
| 369 | } |
| 370 | |
| 371 | static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, |
| 372 | unsigned long end) |
| 373 | { |
| 374 | unsigned long next; |
| 375 | unsigned long addr = start; |
| 376 | pgd_t *src_pgd = pgd_offset_k(start); |
| 377 | |
| 378 | dst_pgd = pgd_offset_raw(dst_pgd, start); |
| 379 | do { |
| 380 | next = pgd_addr_end(addr, end); |
| 381 | if (pgd_none(*src_pgd)) |
| 382 | continue; |
| 383 | if (copy_pud(dst_pgd, src_pgd, addr, next)) |
| 384 | return -ENOMEM; |
| 385 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); |
| 386 | |
| 387 | return 0; |
| 388 | } |
| 389 | |
| 390 | /* |
| 391 | * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). |
| 392 | * |
| 393 | * Memory allocated by get_safe_page() will be dealt with by the hibernate code, |
| 394 | * we don't need to free it here. |
| 395 | */ |
| 396 | int swsusp_arch_resume(void) |
| 397 | { |
| 398 | int rc = 0; |
| 399 | void *zero_page; |
| 400 | size_t exit_size; |
| 401 | pgd_t *tmp_pg_dir; |
| 402 | void *lm_restore_pblist; |
| 403 | phys_addr_t phys_hibernate_exit; |
| 404 | void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, |
| 405 | void *, phys_addr_t, phys_addr_t); |
| 406 | |
| 407 | /* |
Mark Rutland | dfbca61 | 2016-08-11 14:11:06 +0100 | [diff] [blame] | 408 | * Restoring the memory image will overwrite the ttbr1 page tables. |
| 409 | * Create a second copy of just the linear map, and use this when |
| 410 | * restoring. |
| 411 | */ |
| 412 | tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); |
| 413 | if (!tmp_pg_dir) { |
| 414 | pr_err("Failed to allocate memory for temporary page tables."); |
| 415 | rc = -ENOMEM; |
| 416 | goto out; |
| 417 | } |
| 418 | rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); |
| 419 | if (rc) |
| 420 | goto out; |
| 421 | |
| 422 | /* |
| 423 | * Since we only copied the linear map, we need to find restore_pblist's |
| 424 | * linear map address. |
| 425 | */ |
| 426 | lm_restore_pblist = LMADDR(restore_pblist); |
| 427 | |
| 428 | /* |
| 429 | * We need a zero page that is zero before & after resume in order to |
| 430 | * to break before make on the ttbr1 page tables. |
| 431 | */ |
| 432 | zero_page = (void *)get_safe_page(GFP_ATOMIC); |
| 433 | if (!zero_page) { |
| 434 | pr_err("Failed to allocate zero page."); |
| 435 | rc = -ENOMEM; |
| 436 | goto out; |
| 437 | } |
| 438 | |
| 439 | /* |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 440 | * Locate the exit code in the bottom-but-one page, so that *NULL |
| 441 | * still has disastrous affects. |
| 442 | */ |
| 443 | hibernate_exit = (void *)PAGE_SIZE; |
| 444 | exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; |
| 445 | /* |
| 446 | * Copy swsusp_arch_suspend_exit() to a safe page. This will generate |
| 447 | * a new set of ttbr0 page tables and load them. |
| 448 | */ |
| 449 | rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, |
| 450 | (unsigned long)hibernate_exit, |
| 451 | &phys_hibernate_exit, |
| 452 | (void *)get_safe_page, GFP_ATOMIC); |
| 453 | if (rc) { |
| 454 | pr_err("Failed to create safe executable page for hibernate_exit code."); |
| 455 | goto out; |
| 456 | } |
| 457 | |
| 458 | /* |
| 459 | * The hibernate exit text contains a set of el2 vectors, that will |
| 460 | * be executed at el2 with the mmu off in order to reload hyp-stub. |
| 461 | */ |
| 462 | __flush_dcache_area(hibernate_exit, exit_size); |
| 463 | |
| 464 | /* |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 465 | * KASLR will cause the el2 vectors to be in a different location in |
| 466 | * the resumed kernel. Load hibernate's temporary copy into el2. |
| 467 | * |
| 468 | * We can skip this step if we booted at EL1, or are running with VHE. |
| 469 | */ |
| 470 | if (el2_reset_needed()) { |
| 471 | phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ |
| 472 | el2_vectors += hibernate_el2_vectors - |
| 473 | __hibernate_exit_text_start; /* offset */ |
| 474 | |
| 475 | __hyp_set_vectors(el2_vectors); |
| 476 | } |
| 477 | |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 478 | hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, |
| 479 | resume_hdr.reenter_kernel, lm_restore_pblist, |
| 480 | resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); |
| 481 | |
| 482 | out: |
| 483 | return rc; |
| 484 | } |
James Morse | 1fe492c | 2016-04-27 17:47:13 +0100 | [diff] [blame] | 485 | |
| 486 | static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, |
| 487 | unsigned long action, void *ptr) |
| 488 | { |
| 489 | if (action == PM_HIBERNATION_PREPARE && |
| 490 | cpumask_first(cpu_online_mask) != 0) { |
| 491 | pr_warn("CPU0 is offline.\n"); |
| 492 | return notifier_from_errno(-ENODEV); |
| 493 | } |
| 494 | |
| 495 | return NOTIFY_OK; |
| 496 | } |
| 497 | |
| 498 | static int __init check_boot_cpu_online_init(void) |
| 499 | { |
| 500 | /* |
| 501 | * Set this pm_notifier callback with a lower priority than |
| 502 | * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be |
| 503 | * called earlier to disable cpu hotplug before the cpu online check. |
| 504 | */ |
| 505 | pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); |
| 506 | |
| 507 | return 0; |
| 508 | } |
| 509 | core_initcall(check_boot_cpu_online_init); |