blob: 1e2989c5d6b2bec2fb0ad4bdaacd5948b7a9b133 [file] [log] [blame]
Graf Yangc51b4482009-01-07 23:14:39 +08001/*
Robin Getz96f10502009-09-24 14:11:24 +00002 * Copyright 2007-2008 Analog Devices Inc.
3 * Philippe Gerum <rpm@xenomai.org>
Graf Yangc51b4482009-01-07 23:14:39 +08004 *
Robin Getz96f10502009-09-24 14:11:24 +00005 * Licensed under the GPL-2 or later.
Graf Yangc51b4482009-01-07 23:14:39 +08006 */
7
8#include <linux/linkage.h>
9#include <asm/blackfin.h>
10#include <asm/cache.h>
11#include <asm/asm-offsets.h>
12#include <asm/rwlock.h>
13#include <asm/cplb.h>
14
15.text
16
17.macro coreslot_loadaddr reg:req
18 \reg\().l = _corelock;
19 \reg\().h = _corelock;
20.endm
21
Mike Frysingerf99e8c12009-11-03 03:14:38 +000022.macro safe_testset addr:req, scratch:req
23#if ANOMALY_05000477
24 cli \scratch;
25 testset (\addr);
26 sti \scratch;
27#else
28 testset (\addr);
29#endif
30.endm
31
Graf Yangc51b4482009-01-07 23:14:39 +080032/*
33 * r0 = address of atomic data to flush and invalidate (32bit).
34 *
35 * Clear interrupts and return the old mask.
36 * We assume that no atomic data can span cachelines.
37 *
38 * Clobbers: r2:0, p0
39 */
40ENTRY(_get_core_lock)
41 r1 = -L1_CACHE_BYTES;
42 r1 = r0 & r1;
43 cli r0;
44 coreslot_loadaddr p0;
45.Lretry_corelock:
Mike Frysingerf99e8c12009-11-03 03:14:38 +000046 safe_testset p0, r2;
Graf Yangc51b4482009-01-07 23:14:39 +080047 if cc jump .Ldone_corelock;
48 SSYNC(r2);
49 jump .Lretry_corelock
50.Ldone_corelock:
51 p0 = r1;
Sonic Zhang064cc442010-11-12 05:54:32 +000052 /* flush core internal write buffer before invalidate dcache */
Graf Yangc51b4482009-01-07 23:14:39 +080053 CSYNC(r2);
54 flushinv[p0];
55 SSYNC(r2);
56 rts;
57ENDPROC(_get_core_lock)
58
59/*
60 * r0 = address of atomic data in uncacheable memory region (32bit).
61 *
62 * Clear interrupts and return the old mask.
63 *
64 * Clobbers: r0, p0
65 */
66ENTRY(_get_core_lock_noflush)
67 cli r0;
68 coreslot_loadaddr p0;
69.Lretry_corelock_noflush:
Mike Frysingerf99e8c12009-11-03 03:14:38 +000070 safe_testset p0, r2;
Graf Yangc51b4482009-01-07 23:14:39 +080071 if cc jump .Ldone_corelock_noflush;
72 SSYNC(r2);
73 jump .Lretry_corelock_noflush
74.Ldone_corelock_noflush:
Bob Liua5e0d862012-01-17 18:06:34 +080075 /*
76 * SMP kgdb runs into dead loop without NOP here, when one core
77 * single steps over get_core_lock_noflush and the other executes
78 * get_core_lock as a slave node.
79 */
80 nop;
81 CSYNC(r2);
Graf Yangc51b4482009-01-07 23:14:39 +080082 rts;
83ENDPROC(_get_core_lock_noflush)
84
85/*
86 * r0 = interrupt mask to restore.
87 * r1 = address of atomic data to flush and invalidate (32bit).
88 *
89 * Interrupts are masked on entry (see _get_core_lock).
90 * Clobbers: r2:0, p0
91 */
92ENTRY(_put_core_lock)
93 /* Write-through cache assumed, so no flush needed here. */
94 coreslot_loadaddr p0;
95 r1 = 0;
96 [p0] = r1;
97 SSYNC(r2);
98 sti r0;
99 rts;
100ENDPROC(_put_core_lock)
101
102#ifdef __ARCH_SYNC_CORE_DCACHE
103
104ENTRY(___raw_smp_mark_barrier_asm)
105 [--sp] = rets;
106 [--sp] = ( r7:5 );
107 [--sp] = r0;
108 [--sp] = p1;
109 [--sp] = p0;
110 call _get_core_lock_noflush;
111
112 /*
113 * Calculate current core mask
114 */
115 GET_CPUID(p1, r7);
116 r6 = 1;
117 r6 <<= r7;
118
119 /*
120 * Set bit of other cores in barrier mask. Don't change current core bit.
121 */
122 p1.l = _barrier_mask;
123 p1.h = _barrier_mask;
124 r7 = [p1];
125 r5 = r7 & r6;
126 r7 = ~r6;
127 cc = r5 == 0;
128 if cc jump 1f;
129 r7 = r7 | r6;
1301:
131 [p1] = r7;
132 SSYNC(r2);
133
134 call _put_core_lock;
135 p0 = [sp++];
136 p1 = [sp++];
137 r0 = [sp++];
138 ( r7:5 ) = [sp++];
139 rets = [sp++];
140 rts;
141ENDPROC(___raw_smp_mark_barrier_asm)
142
143ENTRY(___raw_smp_check_barrier_asm)
144 [--sp] = rets;
145 [--sp] = ( r7:5 );
146 [--sp] = r0;
147 [--sp] = p1;
148 [--sp] = p0;
149 call _get_core_lock_noflush;
150
151 /*
152 * Calculate current core mask
153 */
154 GET_CPUID(p1, r7);
155 r6 = 1;
156 r6 <<= r7;
157
158 /*
159 * Clear current core bit in barrier mask if it is set.
160 */
161 p1.l = _barrier_mask;
162 p1.h = _barrier_mask;
163 r7 = [p1];
164 r5 = r7 & r6;
165 cc = r5 == 0;
166 if cc jump 1f;
167 r6 = ~r6;
168 r7 = r7 & r6;
169 [p1] = r7;
170 SSYNC(r2);
171
172 call _put_core_lock;
173
174 /*
175 * Invalidate the entire D-cache of current core.
176 */
177 sp += -12;
178 call _resync_core_dcache
179 sp += 12;
180 jump 2f;
1811:
182 call _put_core_lock;
1832:
184 p0 = [sp++];
185 p1 = [sp++];
186 r0 = [sp++];
187 ( r7:5 ) = [sp++];
188 rets = [sp++];
189 rts;
190ENDPROC(___raw_smp_check_barrier_asm)
191
192/*
193 * r0 = irqflags
194 * r1 = address of atomic data
195 *
196 * Clobbers: r2:0, p1:0
197 */
198_start_lock_coherent:
199
200 [--sp] = rets;
201 [--sp] = ( r7:6 );
202 r7 = r0;
203 p1 = r1;
204
205 /*
206 * Determine whether the atomic data was previously
207 * owned by another CPU (=r6).
208 */
209 GET_CPUID(p0, r2);
210 r1 = 1;
211 r1 <<= r2;
212 r2 = ~r1;
213
214 r1 = [p1];
215 r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
216 r6 = r1 & r2;
217 r1 = [p1];
218 r1 <<= 4;
219 r1 >>= 4;
220 [p1] = r1;
221
222 /*
223 * Release the core lock now, but keep IRQs disabled while we are
224 * performing the remaining housekeeping chores for the current CPU.
225 */
226 coreslot_loadaddr p0;
227 r1 = 0;
228 [p0] = r1;
229
230 /*
231 * If another CPU has owned the same atomic section before us,
232 * then our D-cached copy of the shared data protected by the
233 * current spin/write_lock may be obsolete.
234 */
235 cc = r6 == 0;
236 if cc jump .Lcache_synced
237
238 /*
239 * Invalidate the entire D-cache of the current core.
240 */
241 sp += -12;
242 call _resync_core_dcache
243 sp += 12;
244
245.Lcache_synced:
246 SSYNC(r2);
247 sti r7;
248 ( r7:6 ) = [sp++];
249 rets = [sp++];
250 rts
251
252/*
253 * r0 = irqflags
254 * r1 = address of atomic data
255 *
256 * Clobbers: r2:0, p1:0
257 */
258_end_lock_coherent:
259
260 p1 = r1;
261 GET_CPUID(p0, r2);
262 r2 += 28;
263 r1 = 1;
264 r1 <<= r2;
265 r2 = [p1];
266 r2 = r1 | r2;
267 [p1] = r2;
268 r1 = p1;
269 jump _put_core_lock;
270
271#endif /* __ARCH_SYNC_CORE_DCACHE */
272
273/*
274 * r0 = &spinlock->lock
275 *
276 * Clobbers: r3:0, p1:0
277 */
278ENTRY(___raw_spin_is_locked_asm)
279 p1 = r0;
280 [--sp] = rets;
281 call _get_core_lock;
282 r3 = [p1];
283 cc = bittst( r3, 0 );
284 r3 = cc;
285 r1 = p1;
286 call _put_core_lock;
287 rets = [sp++];
288 r0 = r3;
289 rts;
290ENDPROC(___raw_spin_is_locked_asm)
291
292/*
293 * r0 = &spinlock->lock
294 *
295 * Clobbers: r3:0, p1:0
296 */
297ENTRY(___raw_spin_lock_asm)
298 p1 = r0;
299 [--sp] = rets;
300.Lretry_spinlock:
301 call _get_core_lock;
302 r1 = p1;
303 r2 = [p1];
304 cc = bittst( r2, 0 );
305 if cc jump .Lbusy_spinlock
306#ifdef __ARCH_SYNC_CORE_DCACHE
307 r3 = p1;
308 bitset ( r2, 0 ); /* Raise the lock bit. */
309 [p1] = r2;
310 call _start_lock_coherent
311#else
312 r2 = 1;
313 [p1] = r2;
314 call _put_core_lock;
315#endif
316 rets = [sp++];
317 rts;
318
319.Lbusy_spinlock:
320 /* We don't touch the atomic area if busy, so that flush
321 will behave like nop in _put_core_lock. */
322 call _put_core_lock;
323 SSYNC(r2);
324 r0 = p1;
325 jump .Lretry_spinlock
326ENDPROC(___raw_spin_lock_asm)
327
328/*
329 * r0 = &spinlock->lock
330 *
331 * Clobbers: r3:0, p1:0
332 */
333ENTRY(___raw_spin_trylock_asm)
334 p1 = r0;
335 [--sp] = rets;
336 call _get_core_lock;
337 r1 = p1;
338 r3 = [p1];
339 cc = bittst( r3, 0 );
340 if cc jump .Lfailed_trylock
341#ifdef __ARCH_SYNC_CORE_DCACHE
342 bitset ( r3, 0 ); /* Raise the lock bit. */
343 [p1] = r3;
344 call _start_lock_coherent
345#else
346 r2 = 1;
347 [p1] = r2;
348 call _put_core_lock;
349#endif
350 r0 = 1;
351 rets = [sp++];
352 rts;
353.Lfailed_trylock:
354 call _put_core_lock;
355 r0 = 0;
356 rets = [sp++];
357 rts;
358ENDPROC(___raw_spin_trylock_asm)
359
360/*
361 * r0 = &spinlock->lock
362 *
363 * Clobbers: r2:0, p1:0
364 */
365ENTRY(___raw_spin_unlock_asm)
366 p1 = r0;
367 [--sp] = rets;
368 call _get_core_lock;
369 r2 = [p1];
370 bitclr ( r2, 0 );
371 [p1] = r2;
372 r1 = p1;
373#ifdef __ARCH_SYNC_CORE_DCACHE
374 call _end_lock_coherent
375#else
376 call _put_core_lock;
377#endif
378 rets = [sp++];
379 rts;
380ENDPROC(___raw_spin_unlock_asm)
381
382/*
383 * r0 = &rwlock->lock
384 *
385 * Clobbers: r2:0, p1:0
386 */
387ENTRY(___raw_read_lock_asm)
388 p1 = r0;
389 [--sp] = rets;
390 call _get_core_lock;
391.Lrdlock_try:
392 r1 = [p1];
393 r1 += -1;
394 [p1] = r1;
395 cc = r1 < 0;
396 if cc jump .Lrdlock_failed
397 r1 = p1;
398#ifdef __ARCH_SYNC_CORE_DCACHE
399 call _start_lock_coherent
400#else
401 call _put_core_lock;
402#endif
403 rets = [sp++];
404 rts;
405
406.Lrdlock_failed:
407 r1 += 1;
408 [p1] = r1;
409.Lrdlock_wait:
410 r1 = p1;
411 call _put_core_lock;
412 SSYNC(r2);
413 r0 = p1;
414 call _get_core_lock;
415 r1 = [p1];
416 cc = r1 < 2;
417 if cc jump .Lrdlock_wait;
418 jump .Lrdlock_try
419ENDPROC(___raw_read_lock_asm)
420
421/*
422 * r0 = &rwlock->lock
423 *
424 * Clobbers: r3:0, p1:0
425 */
426ENTRY(___raw_read_trylock_asm)
427 p1 = r0;
428 [--sp] = rets;
429 call _get_core_lock;
430 r1 = [p1];
431 cc = r1 <= 0;
432 if cc jump .Lfailed_tryrdlock;
433 r1 += -1;
434 [p1] = r1;
435 r1 = p1;
436#ifdef __ARCH_SYNC_CORE_DCACHE
437 call _start_lock_coherent
438#else
439 call _put_core_lock;
440#endif
441 rets = [sp++];
442 r0 = 1;
443 rts;
444.Lfailed_tryrdlock:
445 r1 = p1;
446 call _put_core_lock;
447 rets = [sp++];
448 r0 = 0;
449 rts;
450ENDPROC(___raw_read_trylock_asm)
451
452/*
453 * r0 = &rwlock->lock
454 *
455 * Note: Processing controlled by a reader lock should not have
456 * any side-effect on cache issues with the other core, so we
457 * just release the core lock and exit (no _end_lock_coherent).
458 *
459 * Clobbers: r3:0, p1:0
460 */
461ENTRY(___raw_read_unlock_asm)
462 p1 = r0;
463 [--sp] = rets;
464 call _get_core_lock;
465 r1 = [p1];
466 r1 += 1;
467 [p1] = r1;
468 r1 = p1;
469 call _put_core_lock;
470 rets = [sp++];
471 rts;
472ENDPROC(___raw_read_unlock_asm)
473
474/*
475 * r0 = &rwlock->lock
476 *
477 * Clobbers: r3:0, p1:0
478 */
479ENTRY(___raw_write_lock_asm)
480 p1 = r0;
481 r3.l = lo(RW_LOCK_BIAS);
482 r3.h = hi(RW_LOCK_BIAS);
483 [--sp] = rets;
484 call _get_core_lock;
485.Lwrlock_try:
486 r1 = [p1];
487 r1 = r1 - r3;
488#ifdef __ARCH_SYNC_CORE_DCACHE
489 r2 = r1;
490 r2 <<= 4;
491 r2 >>= 4;
492 cc = r2 == 0;
493#else
494 cc = r1 == 0;
495#endif
496 if !cc jump .Lwrlock_wait
497 [p1] = r1;
498 r1 = p1;
499#ifdef __ARCH_SYNC_CORE_DCACHE
500 call _start_lock_coherent
501#else
502 call _put_core_lock;
503#endif
504 rets = [sp++];
505 rts;
506
507.Lwrlock_wait:
508 r1 = p1;
509 call _put_core_lock;
510 SSYNC(r2);
511 r0 = p1;
512 call _get_core_lock;
513 r1 = [p1];
514#ifdef __ARCH_SYNC_CORE_DCACHE
515 r1 <<= 4;
516 r1 >>= 4;
517#endif
518 cc = r1 == r3;
519 if !cc jump .Lwrlock_wait;
520 jump .Lwrlock_try
521ENDPROC(___raw_write_lock_asm)
522
523/*
524 * r0 = &rwlock->lock
525 *
526 * Clobbers: r3:0, p1:0
527 */
528ENTRY(___raw_write_trylock_asm)
529 p1 = r0;
530 [--sp] = rets;
531 call _get_core_lock;
532 r1 = [p1];
533 r2.l = lo(RW_LOCK_BIAS);
534 r2.h = hi(RW_LOCK_BIAS);
535 cc = r1 == r2;
536 if !cc jump .Lfailed_trywrlock;
537#ifdef __ARCH_SYNC_CORE_DCACHE
538 r1 >>= 28;
539 r1 <<= 28;
540#else
541 r1 = 0;
542#endif
543 [p1] = r1;
544 r1 = p1;
545#ifdef __ARCH_SYNC_CORE_DCACHE
546 call _start_lock_coherent
547#else
548 call _put_core_lock;
549#endif
550 rets = [sp++];
551 r0 = 1;
552 rts;
553
554.Lfailed_trywrlock:
555 r1 = p1;
556 call _put_core_lock;
557 rets = [sp++];
558 r0 = 0;
559 rts;
560ENDPROC(___raw_write_trylock_asm)
561
562/*
563 * r0 = &rwlock->lock
564 *
565 * Clobbers: r3:0, p1:0
566 */
567ENTRY(___raw_write_unlock_asm)
568 p1 = r0;
569 r3.l = lo(RW_LOCK_BIAS);
570 r3.h = hi(RW_LOCK_BIAS);
571 [--sp] = rets;
572 call _get_core_lock;
573 r1 = [p1];
574 r1 = r1 + r3;
575 [p1] = r1;
576 r1 = p1;
577#ifdef __ARCH_SYNC_CORE_DCACHE
578 call _end_lock_coherent
579#else
580 call _put_core_lock;
581#endif
582 rets = [sp++];
583 rts;
584ENDPROC(___raw_write_unlock_asm)
585
586/*
587 * r0 = ptr
588 * r1 = value
589 *
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200590 * ADD a signed value to a 32bit word and return the new value atomically.
Graf Yangc51b4482009-01-07 23:14:39 +0800591 * Clobbers: r3:0, p1:0
592 */
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200593ENTRY(___raw_atomic_add_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800594 p1 = r0;
595 r3 = r1;
596 [--sp] = rets;
597 call _get_core_lock;
598 r2 = [p1];
599 r3 = r3 + r2;
600 [p1] = r3;
601 r1 = p1;
602 call _put_core_lock;
603 r0 = r3;
604 rets = [sp++];
605 rts;
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200606ENDPROC(___raw_atomic_add_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800607
608/*
609 * r0 = ptr
Peter Zijlstrae87fc0e2016-04-18 01:16:08 +0200610 * r1 = value
611 *
612 * ADD a signed value to a 32bit word and return the old value atomically.
613 * Clobbers: r3:0, p1:0
614 */
615ENTRY(___raw_atomic_xadd_asm)
616 p1 = r0;
617 r3 = r1;
618 [--sp] = rets;
619 call _get_core_lock;
620 r3 = [p1];
621 r2 = r3 + r2;
622 [p1] = r2;
623 r1 = p1;
624 call _put_core_lock;
625 r0 = r3;
626 rets = [sp++];
627 rts;
628ENDPROC(___raw_atomic_add_asm)
629
630/*
631 * r0 = ptr
Graf Yangc51b4482009-01-07 23:14:39 +0800632 * r1 = mask
633 *
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200634 * AND the mask bits from a 32bit word and return the old 32bit value
Graf Yangc51b4482009-01-07 23:14:39 +0800635 * atomically.
636 * Clobbers: r3:0, p1:0
637 */
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200638ENTRY(___raw_atomic_and_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800639 p1 = r0;
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200640 r3 = r1;
Graf Yangc51b4482009-01-07 23:14:39 +0800641 [--sp] = rets;
642 call _get_core_lock;
Peter Zijlstrae87fc0e2016-04-18 01:16:08 +0200643 r3 = [p1];
644 r2 = r2 & r3;
645 [p1] = r2;
Graf Yangc51b4482009-01-07 23:14:39 +0800646 r1 = p1;
647 call _put_core_lock;
648 r0 = r3;
649 rets = [sp++];
650 rts;
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200651ENDPROC(___raw_atomic_and_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800652
653/*
654 * r0 = ptr
655 * r1 = mask
656 *
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200657 * OR the mask bits into a 32bit word and return the old 32bit value
Graf Yangc51b4482009-01-07 23:14:39 +0800658 * atomically.
659 * Clobbers: r3:0, p1:0
660 */
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200661ENTRY(___raw_atomic_or_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800662 p1 = r0;
663 r3 = r1;
664 [--sp] = rets;
665 call _get_core_lock;
Peter Zijlstrae87fc0e2016-04-18 01:16:08 +0200666 r3 = [p1];
667 r2 = r2 | r3;
668 [p1] = r2;
Graf Yangc51b4482009-01-07 23:14:39 +0800669 r1 = p1;
670 call _put_core_lock;
671 r0 = r3;
672 rets = [sp++];
673 rts;
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200674ENDPROC(___raw_atomic_or_asm)
Graf Yangc51b4482009-01-07 23:14:39 +0800675
676/*
677 * r0 = ptr
678 * r1 = mask
679 *
680 * XOR the mask bits with a 32bit word and return the old 32bit value
681 * atomically.
682 * Clobbers: r3:0, p1:0
683 */
684ENTRY(___raw_atomic_xor_asm)
685 p1 = r0;
686 r3 = r1;
687 [--sp] = rets;
688 call _get_core_lock;
Peter Zijlstrae87fc0e2016-04-18 01:16:08 +0200689 r3 = [p1];
690 r2 = r2 ^ r3;
691 [p1] = r2;
Graf Yangc51b4482009-01-07 23:14:39 +0800692 r1 = p1;
693 call _put_core_lock;
694 r0 = r3;
695 rets = [sp++];
696 rts;
697ENDPROC(___raw_atomic_xor_asm)
698
699/*
700 * r0 = ptr
701 * r1 = mask
702 *
703 * Perform a logical AND between the mask bits and a 32bit word, and
704 * return the masked value. We need this on this architecture in
705 * order to invalidate the local cache before testing.
706 *
707 * Clobbers: r3:0, p1:0
708 */
709ENTRY(___raw_atomic_test_asm)
710 p1 = r0;
711 r3 = r1;
712 r1 = -L1_CACHE_BYTES;
713 r1 = r0 & r1;
714 p0 = r1;
Sonic Zhang064cc442010-11-12 05:54:32 +0000715 /* flush core internal write buffer before invalidate dcache */
716 CSYNC(r2);
Graf Yangc51b4482009-01-07 23:14:39 +0800717 flushinv[p0];
718 SSYNC(r2);
719 r0 = [p1];
720 r0 = r0 & r3;
721 rts;
722ENDPROC(___raw_atomic_test_asm)
723
724/*
725 * r0 = ptr
726 * r1 = value
727 *
728 * Swap *ptr with value and return the old 32bit value atomically.
729 * Clobbers: r3:0, p1:0
730 */
731#define __do_xchg(src, dst) \
732 p1 = r0; \
733 r3 = r1; \
734 [--sp] = rets; \
735 call _get_core_lock; \
736 r2 = src; \
737 dst = r3; \
738 r3 = r2; \
739 r1 = p1; \
740 call _put_core_lock; \
741 r0 = r3; \
742 rets = [sp++]; \
743 rts;
744
745ENTRY(___raw_xchg_1_asm)
746 __do_xchg(b[p1] (z), b[p1])
747ENDPROC(___raw_xchg_1_asm)
748
749ENTRY(___raw_xchg_2_asm)
750 __do_xchg(w[p1] (z), w[p1])
751ENDPROC(___raw_xchg_2_asm)
752
753ENTRY(___raw_xchg_4_asm)
754 __do_xchg([p1], [p1])
755ENDPROC(___raw_xchg_4_asm)
756
757/*
758 * r0 = ptr
759 * r1 = new
760 * r2 = old
761 *
762 * Swap *ptr with new if *ptr == old and return the previous *ptr
763 * value atomically.
764 *
765 * Clobbers: r3:0, p1:0
766 */
767#define __do_cmpxchg(src, dst) \
768 [--sp] = rets; \
769 [--sp] = r4; \
770 p1 = r0; \
771 r3 = r1; \
772 r4 = r2; \
773 call _get_core_lock; \
774 r2 = src; \
775 cc = r2 == r4; \
776 if !cc jump 1f; \
777 dst = r3; \
778 1: r3 = r2; \
779 r1 = p1; \
780 call _put_core_lock; \
781 r0 = r3; \
782 r4 = [sp++]; \
783 rets = [sp++]; \
784 rts;
785
786ENTRY(___raw_cmpxchg_1_asm)
787 __do_cmpxchg(b[p1] (z), b[p1])
788ENDPROC(___raw_cmpxchg_1_asm)
789
790ENTRY(___raw_cmpxchg_2_asm)
791 __do_cmpxchg(w[p1] (z), w[p1])
792ENDPROC(___raw_cmpxchg_2_asm)
793
794ENTRY(___raw_cmpxchg_4_asm)
795 __do_cmpxchg([p1], [p1])
796ENDPROC(___raw_cmpxchg_4_asm)
797
798/*
799 * r0 = ptr
800 * r1 = bitnr
801 *
802 * Set a bit in a 32bit word and return the old 32bit value atomically.
803 * Clobbers: r3:0, p1:0
804 */
805ENTRY(___raw_bit_set_asm)
806 r2 = r1;
807 r1 = 1;
808 r1 <<= r2;
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200809 jump ___raw_atomic_or_asm
Graf Yangc51b4482009-01-07 23:14:39 +0800810ENDPROC(___raw_bit_set_asm)
811
812/*
813 * r0 = ptr
814 * r1 = bitnr
815 *
816 * Clear a bit in a 32bit word and return the old 32bit value atomically.
817 * Clobbers: r3:0, p1:0
818 */
819ENTRY(___raw_bit_clear_asm)
Peter Zijlstrad835b6c2015-04-23 21:44:42 +0200820 r2 = 1;
821 r2 <<= r1;
822 r1 = ~r2;
823 jump ___raw_atomic_and_asm
Graf Yangc51b4482009-01-07 23:14:39 +0800824ENDPROC(___raw_bit_clear_asm)
825
826/*
827 * r0 = ptr
828 * r1 = bitnr
829 *
830 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
831 * Clobbers: r3:0, p1:0
832 */
833ENTRY(___raw_bit_toggle_asm)
834 r2 = r1;
835 r1 = 1;
836 r1 <<= r2;
837 jump ___raw_atomic_xor_asm
838ENDPROC(___raw_bit_toggle_asm)
839
840/*
841 * r0 = ptr
842 * r1 = bitnr
843 *
844 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
845 * Clobbers: r3:0, p1:0
846 */
847ENTRY(___raw_bit_test_set_asm)
848 [--sp] = rets;
849 [--sp] = r1;
850 call ___raw_bit_set_asm
851 r1 = [sp++];
852 r2 = 1;
853 r2 <<= r1;
854 r0 = r0 & r2;
855 cc = r0 == 0;
856 if cc jump 1f
857 r0 = 1;
8581:
859 rets = [sp++];
860 rts;
861ENDPROC(___raw_bit_test_set_asm)
862
863/*
864 * r0 = ptr
865 * r1 = bitnr
866 *
867 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
868 * Clobbers: r3:0, p1:0
869 */
870ENTRY(___raw_bit_test_clear_asm)
871 [--sp] = rets;
872 [--sp] = r1;
873 call ___raw_bit_clear_asm
874 r1 = [sp++];
875 r2 = 1;
876 r2 <<= r1;
877 r0 = r0 & r2;
878 cc = r0 == 0;
879 if cc jump 1f
880 r0 = 1;
8811:
882 rets = [sp++];
883 rts;
884ENDPROC(___raw_bit_test_clear_asm)
885
886/*
887 * r0 = ptr
888 * r1 = bitnr
889 *
890 * Test-and-toggle a bit in a 32bit word,
891 * and return the old bit value atomically.
892 * Clobbers: r3:0, p1:0
893 */
894ENTRY(___raw_bit_test_toggle_asm)
895 [--sp] = rets;
896 [--sp] = r1;
897 call ___raw_bit_toggle_asm
898 r1 = [sp++];
899 r2 = 1;
900 r2 <<= r1;
901 r0 = r0 & r2;
902 cc = r0 == 0;
903 if cc jump 1f
904 r0 = 1;
9051:
906 rets = [sp++];
907 rts;
908ENDPROC(___raw_bit_test_toggle_asm)
909
910/*
911 * r0 = ptr
912 * r1 = bitnr
913 *
914 * Test a bit in a 32bit word and return its value.
915 * We need this on this architecture in order to invalidate
916 * the local cache before testing.
917 *
918 * Clobbers: r3:0, p1:0
919 */
920ENTRY(___raw_bit_test_asm)
921 r2 = r1;
922 r1 = 1;
923 r1 <<= r2;
924 jump ___raw_atomic_test_asm
925ENDPROC(___raw_bit_test_asm)
926
927/*
928 * r0 = ptr
929 *
930 * Fetch and return an uncached 32bit value.
931 *
932 * Clobbers: r2:0, p1:0
933 */
934ENTRY(___raw_uncached_fetch_asm)
935 p1 = r0;
936 r1 = -L1_CACHE_BYTES;
937 r1 = r0 & r1;
938 p0 = r1;
Sonic Zhang064cc442010-11-12 05:54:32 +0000939 /* flush core internal write buffer before invalidate dcache */
940 CSYNC(r2);
Graf Yangc51b4482009-01-07 23:14:39 +0800941 flushinv[p0];
942 SSYNC(r2);
943 r0 = [p1];
944 rts;
945ENDPROC(___raw_uncached_fetch_asm)