blob: 92e3d7ba496a63637d2bb1a8fcf36539da784566 [file] [log] [blame]
Chen-Yu Tsai745373e2018-01-17 16:46:47 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2018 Chen-Yu Tsai
4 *
5 * Chen-Yu Tsai <wens@csie.org>
6 *
7 * arch/arm/mach-sunxi/mc_smp.c
8 *
9 * Based on Allwinner code, arch/arm/mach-exynos/mcpm-exynos.c, and
10 * arch/arm/mach-hisi/platmcpm.c
11 * Cluster cache enable trampoline code adapted from MCPM framework
12 */
13
14#include <linux/arm-cci.h>
15#include <linux/cpu_pm.h>
16#include <linux/delay.h>
17#include <linux/io.h>
18#include <linux/of.h>
19#include <linux/of_address.h>
20#include <linux/of_device.h>
21#include <linux/smp.h>
22
23#include <asm/cacheflush.h>
24#include <asm/cp15.h>
25#include <asm/cputype.h>
26#include <asm/idmap.h>
27#include <asm/smp_plat.h>
28#include <asm/suspend.h>
29
30#define SUNXI_CPUS_PER_CLUSTER 4
31#define SUNXI_NR_CLUSTERS 2
32
33#define CPUCFG_CX_CTRL_REG0(c) (0x10 * (c))
34#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(n) BIT(n)
35#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL 0xf
36#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7 BIT(4)
37#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15 BIT(0)
38#define CPUCFG_CX_CTRL_REG1(c) (0x10 * (c) + 0x4)
39#define CPUCFG_CX_CTRL_REG1_ACINACTM BIT(0)
40#define CPUCFG_CX_RST_CTRL(c) (0x80 + 0x4 * (c))
41#define CPUCFG_CX_RST_CTRL_DBG_SOC_RST BIT(24)
42#define CPUCFG_CX_RST_CTRL_ETM_RST(n) BIT(20 + (n))
43#define CPUCFG_CX_RST_CTRL_ETM_RST_ALL (0xf << 20)
44#define CPUCFG_CX_RST_CTRL_DBG_RST(n) BIT(16 + (n))
45#define CPUCFG_CX_RST_CTRL_DBG_RST_ALL (0xf << 16)
46#define CPUCFG_CX_RST_CTRL_H_RST BIT(12)
47#define CPUCFG_CX_RST_CTRL_L2_RST BIT(8)
48#define CPUCFG_CX_RST_CTRL_CX_RST(n) BIT(4 + (n))
49#define CPUCFG_CX_RST_CTRL_CORE_RST(n) BIT(n)
50
51#define PRCM_CPU_PO_RST_CTRL(c) (0x4 + 0x4 * (c))
52#define PRCM_CPU_PO_RST_CTRL_CORE(n) BIT(n)
53#define PRCM_CPU_PO_RST_CTRL_CORE_ALL 0xf
54#define PRCM_PWROFF_GATING_REG(c) (0x100 + 0x4 * (c))
55#define PRCM_PWROFF_GATING_REG_CLUSTER BIT(4)
56#define PRCM_PWROFF_GATING_REG_CORE(n) BIT(n)
57#define PRCM_PWR_SWITCH_REG(c, cpu) (0x140 + 0x10 * (c) + 0x4 * (cpu))
58#define PRCM_CPU_SOFT_ENTRY_REG 0x164
59
60static void __iomem *cpucfg_base;
61static void __iomem *prcm_base;
62
63static bool sunxi_core_is_cortex_a15(unsigned int core, unsigned int cluster)
64{
65 struct device_node *node;
66 int cpu = cluster * SUNXI_CPUS_PER_CLUSTER + core;
67
68 node = of_cpu_device_node_get(cpu);
69
70 /* In case of_cpu_device_node_get fails */
71 if (!node)
72 node = of_get_cpu_node(cpu, NULL);
73
74 if (!node) {
75 /*
76 * There's no point in returning an error, since we
77 * would be mid way in a core or cluster power sequence.
78 */
79 pr_err("%s: Couldn't get CPU cluster %u core %u device node\n",
80 __func__, cluster, core);
81
82 return false;
83 }
84
85 return of_device_is_compatible(node, "arm,cortex-a15");
86}
87
88static int sunxi_cpu_power_switch_set(unsigned int cpu, unsigned int cluster,
89 bool enable)
90{
91 u32 reg;
92
93 /* control sequence from Allwinner A80 user manual v1.2 PRCM section */
94 reg = readl(prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
95 if (enable) {
96 if (reg == 0x00) {
97 pr_debug("power clamp for cluster %u cpu %u already open\n",
98 cluster, cpu);
99 return 0;
100 }
101
102 writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
103 udelay(10);
104 writel(0xfe, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
105 udelay(10);
106 writel(0xf8, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
107 udelay(10);
108 writel(0xf0, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
109 udelay(10);
110 writel(0x00, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
111 udelay(10);
112 } else {
113 writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
114 udelay(10);
115 }
116
117 return 0;
118}
119
120static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster)
121{
122 u32 reg;
123
124 pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
125 if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
126 return -EINVAL;
127
128 /* assert processor power-on reset */
129 reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
130 reg &= ~PRCM_CPU_PO_RST_CTRL_CORE(cpu);
131 writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
132
133 /* Cortex-A7: hold L1 reset disable signal low */
134 if (!sunxi_core_is_cortex_a15(cpu, cluster)) {
135 reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
136 reg &= ~CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(cpu);
137 writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
138 }
139
140 /* assert processor related resets */
141 reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
142 reg &= ~CPUCFG_CX_RST_CTRL_DBG_RST(cpu);
143
144 /*
145 * Allwinner code also asserts resets for NEON on A15. According
146 * to ARM manuals, asserting power-on reset is sufficient.
147 */
148 if (!sunxi_core_is_cortex_a15(cpu, cluster))
149 reg &= ~CPUCFG_CX_RST_CTRL_ETM_RST(cpu);
150
151 writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
152
153 /* open power switch */
154 sunxi_cpu_power_switch_set(cpu, cluster, true);
155
156 /* clear processor power gate */
157 reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
158 reg &= ~PRCM_PWROFF_GATING_REG_CORE(cpu);
159 writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
160 udelay(20);
161
162 /* de-assert processor power-on reset */
163 reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
164 reg |= PRCM_CPU_PO_RST_CTRL_CORE(cpu);
165 writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
166
167 /* de-assert all processor resets */
168 reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
169 reg |= CPUCFG_CX_RST_CTRL_DBG_RST(cpu);
170 reg |= CPUCFG_CX_RST_CTRL_CORE_RST(cpu);
171 if (!sunxi_core_is_cortex_a15(cpu, cluster))
172 reg |= CPUCFG_CX_RST_CTRL_ETM_RST(cpu);
173 else
174 reg |= CPUCFG_CX_RST_CTRL_CX_RST(cpu); /* NEON */
175 writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
176
177 return 0;
178}
179
180static int sunxi_cluster_powerup(unsigned int cluster)
181{
182 u32 reg;
183
184 pr_debug("%s: cluster %u\n", __func__, cluster);
185 if (cluster >= SUNXI_NR_CLUSTERS)
186 return -EINVAL;
187
188 /* assert ACINACTM */
189 reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
190 reg |= CPUCFG_CX_CTRL_REG1_ACINACTM;
191 writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
192
193 /* assert cluster processor power-on resets */
194 reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
195 reg &= ~PRCM_CPU_PO_RST_CTRL_CORE_ALL;
196 writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
197
198 /* assert cluster resets */
199 reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
200 reg &= ~CPUCFG_CX_RST_CTRL_DBG_SOC_RST;
201 reg &= ~CPUCFG_CX_RST_CTRL_DBG_RST_ALL;
202 reg &= ~CPUCFG_CX_RST_CTRL_H_RST;
203 reg &= ~CPUCFG_CX_RST_CTRL_L2_RST;
204
205 /*
206 * Allwinner code also asserts resets for NEON on A15. According
207 * to ARM manuals, asserting power-on reset is sufficient.
208 */
209 if (!sunxi_core_is_cortex_a15(0, cluster))
210 reg &= ~CPUCFG_CX_RST_CTRL_ETM_RST_ALL;
211
212 writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
213
214 /* hold L1/L2 reset disable signals low */
215 reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
216 if (sunxi_core_is_cortex_a15(0, cluster)) {
217 /* Cortex-A15: hold L2RSTDISABLE low */
218 reg &= ~CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15;
219 } else {
220 /* Cortex-A7: hold L1RSTDISABLE and L2RSTDISABLE low */
221 reg &= ~CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL;
222 reg &= ~CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7;
223 }
224 writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
225
226 /* clear cluster power gate */
227 reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
228 reg &= ~PRCM_PWROFF_GATING_REG_CLUSTER;
229 writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
230 udelay(20);
231
232 /* de-assert cluster resets */
233 reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
234 reg |= CPUCFG_CX_RST_CTRL_DBG_SOC_RST;
235 reg |= CPUCFG_CX_RST_CTRL_H_RST;
236 reg |= CPUCFG_CX_RST_CTRL_L2_RST;
237 writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
238
239 /* de-assert ACINACTM */
240 reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
241 reg &= ~CPUCFG_CX_CTRL_REG1_ACINACTM;
242 writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
243
244 return 0;
245}
246
247/*
248 * This bit is shared between the initial nocache_trampoline call to
249 * enable CCI-400 and proper cluster cache disable before power down.
250 */
251static void sunxi_cluster_cache_disable_without_axi(void)
252{
253 if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
254 /*
255 * On the Cortex-A15 we need to disable
256 * L2 prefetching before flushing the cache.
257 */
258 asm volatile(
259 "mcr p15, 1, %0, c15, c0, 3\n"
260 "isb\n"
261 "dsb"
262 : : "r" (0x400));
263 }
264
265 /* Flush all cache levels for this cluster. */
266 v7_exit_coherency_flush(all);
267
268 /*
269 * Disable cluster-level coherency by masking
270 * incoming snoops and DVM messages:
271 */
272 cci_disable_port_by_cpu(read_cpuid_mpidr());
273}
274
275static int sunxi_mc_smp_cpu_table[SUNXI_NR_CLUSTERS][SUNXI_CPUS_PER_CLUSTER];
276static int sunxi_mc_smp_first_comer;
277
278/*
279 * Enable cluster-level coherency, in preparation for turning on the MMU.
280 *
281 * Also enable regional clock gating and L2 data latency settings for
282 * Cortex-A15. These settings are from the vendor kernel.
283 */
284static void __naked sunxi_mc_smp_cluster_cache_enable(void)
285{
286 asm volatile (
287 "mrc p15, 0, r1, c0, c0, 0\n"
288 "movw r2, #" __stringify(ARM_CPU_PART_MASK & 0xffff) "\n"
289 "movt r2, #" __stringify(ARM_CPU_PART_MASK >> 16) "\n"
290 "and r1, r1, r2\n"
291 "movw r2, #" __stringify(ARM_CPU_PART_CORTEX_A15 & 0xffff) "\n"
292 "movt r2, #" __stringify(ARM_CPU_PART_CORTEX_A15 >> 16) "\n"
293 "cmp r1, r2\n"
294 "bne not_a15\n"
295
296 /* The following is Cortex-A15 specific */
297
298 /* ACTLR2: Enable CPU regional clock gates */
299 "mrc p15, 1, r1, c15, c0, 4\n"
300 "orr r1, r1, #(0x1<<31)\n"
301 "mcr p15, 1, r1, c15, c0, 4\n"
302
303 /* L2ACTLR */
304 "mrc p15, 1, r1, c15, c0, 0\n"
305 /* Enable L2, GIC, and Timer regional clock gates */
306 "orr r1, r1, #(0x1<<26)\n"
307 /* Disable clean/evict from being pushed to external */
308 "orr r1, r1, #(0x1<<3)\n"
309 "mcr p15, 1, r1, c15, c0, 0\n"
310
311 /* L2CTRL: L2 data RAM latency */
312 "mrc p15, 1, r1, c9, c0, 2\n"
313 "bic r1, r1, #(0x7<<0)\n"
314 "orr r1, r1, #(0x3<<0)\n"
315 "mcr p15, 1, r1, c9, c0, 2\n"
316
317 /* End of Cortex-A15 specific setup */
318 "not_a15:\n"
319
320 /* Get value of sunxi_mc_smp_first_comer */
321 "adr r1, first\n"
322 "ldr r0, [r1]\n"
323 "ldr r0, [r1, r0]\n"
324
325 /* Skip cci_enable_port_for_self if not first comer */
326 "cmp r0, #0\n"
327 "bxeq lr\n"
328 "b cci_enable_port_for_self\n"
329
330 ".align 2\n"
331 "first: .word sunxi_mc_smp_first_comer - .\n"
332 );
333}
334
335static void __naked sunxi_mc_smp_secondary_startup(void)
336{
337 asm volatile(
338 "bl sunxi_mc_smp_cluster_cache_enable\n"
339 "b secondary_startup"
340 /* Let compiler know about sunxi_mc_smp_cluster_cache_enable */
341 :: "i" (sunxi_mc_smp_cluster_cache_enable)
342 );
343}
344
345static DEFINE_SPINLOCK(boot_lock);
346
347static bool sunxi_mc_smp_cluster_is_down(unsigned int cluster)
348{
349 int i;
350
351 for (i = 0; i < SUNXI_CPUS_PER_CLUSTER; i++)
352 if (sunxi_mc_smp_cpu_table[cluster][i])
353 return false;
354 return true;
355}
356
357static int sunxi_mc_smp_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
358{
359 unsigned int mpidr, cpu, cluster;
360
361 mpidr = cpu_logical_map(l_cpu);
362 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
363 cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
364
365 if (!cpucfg_base)
366 return -ENODEV;
367 if (cluster >= SUNXI_NR_CLUSTERS || cpu >= SUNXI_CPUS_PER_CLUSTER)
368 return -EINVAL;
369
370 spin_lock_irq(&boot_lock);
371
372 if (sunxi_mc_smp_cpu_table[cluster][cpu])
373 goto out;
374
375 if (sunxi_mc_smp_cluster_is_down(cluster)) {
376 sunxi_mc_smp_first_comer = true;
377 sunxi_cluster_powerup(cluster);
378 } else {
379 sunxi_mc_smp_first_comer = false;
380 }
381
382 /* This is read by incoming CPUs with their cache and MMU disabled */
383 sync_cache_w(&sunxi_mc_smp_first_comer);
384 sunxi_cpu_powerup(cpu, cluster);
385
386out:
387 sunxi_mc_smp_cpu_table[cluster][cpu]++;
388 spin_unlock_irq(&boot_lock);
389
390 return 0;
391}
392
393static const struct smp_operations sunxi_mc_smp_smp_ops __initconst = {
394 .smp_boot_secondary = sunxi_mc_smp_boot_secondary,
395};
396
397static bool __init sunxi_mc_smp_cpu_table_init(void)
398{
399 unsigned int mpidr, cpu, cluster;
400
401 mpidr = read_cpuid_mpidr();
402 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
403 cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
404
405 if (cluster >= SUNXI_NR_CLUSTERS || cpu >= SUNXI_CPUS_PER_CLUSTER) {
406 pr_err("%s: boot CPU is out of bounds!\n", __func__);
407 return false;
408 }
409 sunxi_mc_smp_cpu_table[cluster][cpu] = 1;
410 return true;
411}
412
413/*
414 * Adapted from arch/arm/common/mc_smp_entry.c
415 *
416 * We need the trampoline code to enable CCI-400 on the first cluster
417 */
418typedef typeof(cpu_reset) phys_reset_t;
419
420static void __init __naked sunxi_mc_smp_resume(void)
421{
422 asm volatile(
423 "bl sunxi_mc_smp_cluster_cache_enable\n"
424 "b cpu_resume"
425 /* Let compiler know about sunxi_mc_smp_cluster_cache_enable */
426 :: "i" (sunxi_mc_smp_cluster_cache_enable)
427 );
428}
429
430static int __init nocache_trampoline(unsigned long __unused)
431{
432 phys_reset_t phys_reset;
433
434 setup_mm_for_reboot();
435 sunxi_cluster_cache_disable_without_axi();
436
437 phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
438 phys_reset(__pa_symbol(sunxi_mc_smp_resume), false);
439 BUG();
440}
441
442static int __init sunxi_mc_smp_lookback(void)
443{
444 int ret;
445
446 /*
447 * We're going to soft-restart the current CPU through the
448 * low-level MCPM code by leveraging the suspend/resume
449 * infrastructure. Let's play it safe by using cpu_pm_enter()
450 * in case the CPU init code path resets the VFP or similar.
451 */
452 sunxi_mc_smp_first_comer = true;
453 local_irq_disable();
454 local_fiq_disable();
455 ret = cpu_pm_enter();
456 if (!ret) {
457 ret = cpu_suspend(0, nocache_trampoline);
458 cpu_pm_exit();
459 }
460 local_fiq_enable();
461 local_irq_enable();
462 sunxi_mc_smp_first_comer = false;
463
464 return ret;
465}
466
467static int __init sunxi_mc_smp_init(void)
468{
469 struct device_node *cpucfg_node, *node;
470 struct resource res;
471 int ret;
472
473 if (!of_machine_is_compatible("allwinner,sun9i-a80"))
474 return -ENODEV;
475
476 if (!sunxi_mc_smp_cpu_table_init())
477 return -EINVAL;
478
479 if (!cci_probed()) {
480 pr_err("%s: CCI-400 not available\n", __func__);
481 return -ENODEV;
482 }
483
484 node = of_find_compatible_node(NULL, NULL, "allwinner,sun9i-a80-prcm");
485 if (!node) {
486 pr_err("%s: PRCM not available\n", __func__);
487 return -ENODEV;
488 }
489
490 /*
491 * Unfortunately we can not request the I/O region for the PRCM.
492 * It is shared with the PRCM clock.
493 */
494 prcm_base = of_iomap(node, 0);
495 of_node_put(node);
496 if (!prcm_base) {
497 pr_err("%s: failed to map PRCM registers\n", __func__);
498 return -ENOMEM;
499 }
500
501 cpucfg_node = of_find_compatible_node(NULL, NULL,
502 "allwinner,sun9i-a80-cpucfg");
503 if (!cpucfg_node) {
504 ret = -ENODEV;
505 pr_err("%s: CPUCFG not available\n", __func__);
506 goto err_unmap_prcm;
507 }
508
509 cpucfg_base = of_io_request_and_map(cpucfg_node, 0, "sunxi-mc-smp");
510 if (IS_ERR(cpucfg_base)) {
511 ret = PTR_ERR(cpucfg_base);
512 pr_err("%s: failed to map CPUCFG registers: %d\n",
513 __func__, ret);
514 goto err_put_cpucfg_node;
515 }
516
517 /* Configure CCI-400 for boot cluster */
518 ret = sunxi_mc_smp_lookback();
519 if (ret) {
520 pr_err("%s: failed to configure boot cluster: %d\n",
521 __func__, ret);
522 goto err_unmap_release_cpucfg;
523 }
524
525 /* We don't need the CPUCFG device node anymore */
526 of_node_put(cpucfg_node);
527
528 /* Set the hardware entry point address */
529 writel(__pa_symbol(sunxi_mc_smp_secondary_startup),
530 prcm_base + PRCM_CPU_SOFT_ENTRY_REG);
531
532 /* Actually enable multi cluster SMP */
533 smp_set_ops(&sunxi_mc_smp_smp_ops);
534
535 pr_info("sunxi multi cluster SMP support installed\n");
536
537 return 0;
538
539err_unmap_release_cpucfg:
540 iounmap(cpucfg_base);
541 of_address_to_resource(cpucfg_node, 0, &res);
542 release_mem_region(res.start, resource_size(&res));
543err_put_cpucfg_node:
544 of_node_put(cpucfg_node);
545err_unmap_prcm:
546 iounmap(prcm_base);
547 return ret;
548}
549
550early_initcall(sunxi_mc_smp_init);