blob: 0261a5e751b35189d79f5684ff227f6552145ff3 [file] [log] [blame]
Graf Yangc51b4482009-01-07 23:14:39 +08001/*
Robin Getz96f10502009-09-24 14:11:24 +00002 * Copyright 2007-2008 Analog Devices Inc.
3 * Philippe Gerum <rpm@xenomai.org>
Graf Yangc51b4482009-01-07 23:14:39 +08004 *
Robin Getz96f10502009-09-24 14:11:24 +00005 * Licensed under the GPL-2 or later.
Graf Yangc51b4482009-01-07 23:14:39 +08006 */
7
8#include <linux/linkage.h>
9#include <asm/blackfin.h>
10#include <asm/cache.h>
11#include <asm/asm-offsets.h>
12#include <asm/rwlock.h>
13#include <asm/cplb.h>
14
15.text
16
17.macro coreslot_loadaddr reg:req
18 \reg\().l = _corelock;
19 \reg\().h = _corelock;
20.endm
21
22/*
23 * r0 = address of atomic data to flush and invalidate (32bit).
24 *
25 * Clear interrupts and return the old mask.
26 * We assume that no atomic data can span cachelines.
27 *
28 * Clobbers: r2:0, p0
29 */
30ENTRY(_get_core_lock)
31 r1 = -L1_CACHE_BYTES;
32 r1 = r0 & r1;
33 cli r0;
34 coreslot_loadaddr p0;
35.Lretry_corelock:
36 testset (p0);
37 if cc jump .Ldone_corelock;
38 SSYNC(r2);
39 jump .Lretry_corelock
40.Ldone_corelock:
41 p0 = r1;
42 CSYNC(r2);
43 flushinv[p0];
44 SSYNC(r2);
45 rts;
46ENDPROC(_get_core_lock)
47
48/*
49 * r0 = address of atomic data in uncacheable memory region (32bit).
50 *
51 * Clear interrupts and return the old mask.
52 *
53 * Clobbers: r0, p0
54 */
55ENTRY(_get_core_lock_noflush)
56 cli r0;
57 coreslot_loadaddr p0;
58.Lretry_corelock_noflush:
59 testset (p0);
60 if cc jump .Ldone_corelock_noflush;
61 SSYNC(r2);
62 jump .Lretry_corelock_noflush
63.Ldone_corelock_noflush:
64 rts;
65ENDPROC(_get_core_lock_noflush)
66
67/*
68 * r0 = interrupt mask to restore.
69 * r1 = address of atomic data to flush and invalidate (32bit).
70 *
71 * Interrupts are masked on entry (see _get_core_lock).
72 * Clobbers: r2:0, p0
73 */
74ENTRY(_put_core_lock)
75 /* Write-through cache assumed, so no flush needed here. */
76 coreslot_loadaddr p0;
77 r1 = 0;
78 [p0] = r1;
79 SSYNC(r2);
80 sti r0;
81 rts;
82ENDPROC(_put_core_lock)
83
84#ifdef __ARCH_SYNC_CORE_DCACHE
85
86ENTRY(___raw_smp_mark_barrier_asm)
87 [--sp] = rets;
88 [--sp] = ( r7:5 );
89 [--sp] = r0;
90 [--sp] = p1;
91 [--sp] = p0;
92 call _get_core_lock_noflush;
93
94 /*
95 * Calculate current core mask
96 */
97 GET_CPUID(p1, r7);
98 r6 = 1;
99 r6 <<= r7;
100
101 /*
102 * Set bit of other cores in barrier mask. Don't change current core bit.
103 */
104 p1.l = _barrier_mask;
105 p1.h = _barrier_mask;
106 r7 = [p1];
107 r5 = r7 & r6;
108 r7 = ~r6;
109 cc = r5 == 0;
110 if cc jump 1f;
111 r7 = r7 | r6;
1121:
113 [p1] = r7;
114 SSYNC(r2);
115
116 call _put_core_lock;
117 p0 = [sp++];
118 p1 = [sp++];
119 r0 = [sp++];
120 ( r7:5 ) = [sp++];
121 rets = [sp++];
122 rts;
123ENDPROC(___raw_smp_mark_barrier_asm)
124
125ENTRY(___raw_smp_check_barrier_asm)
126 [--sp] = rets;
127 [--sp] = ( r7:5 );
128 [--sp] = r0;
129 [--sp] = p1;
130 [--sp] = p0;
131 call _get_core_lock_noflush;
132
133 /*
134 * Calculate current core mask
135 */
136 GET_CPUID(p1, r7);
137 r6 = 1;
138 r6 <<= r7;
139
140 /*
141 * Clear current core bit in barrier mask if it is set.
142 */
143 p1.l = _barrier_mask;
144 p1.h = _barrier_mask;
145 r7 = [p1];
146 r5 = r7 & r6;
147 cc = r5 == 0;
148 if cc jump 1f;
149 r6 = ~r6;
150 r7 = r7 & r6;
151 [p1] = r7;
152 SSYNC(r2);
153
154 call _put_core_lock;
155
156 /*
157 * Invalidate the entire D-cache of current core.
158 */
159 sp += -12;
160 call _resync_core_dcache
161 sp += 12;
162 jump 2f;
1631:
164 call _put_core_lock;
1652:
166 p0 = [sp++];
167 p1 = [sp++];
168 r0 = [sp++];
169 ( r7:5 ) = [sp++];
170 rets = [sp++];
171 rts;
172ENDPROC(___raw_smp_check_barrier_asm)
173
174/*
175 * r0 = irqflags
176 * r1 = address of atomic data
177 *
178 * Clobbers: r2:0, p1:0
179 */
180_start_lock_coherent:
181
182 [--sp] = rets;
183 [--sp] = ( r7:6 );
184 r7 = r0;
185 p1 = r1;
186
187 /*
188 * Determine whether the atomic data was previously
189 * owned by another CPU (=r6).
190 */
191 GET_CPUID(p0, r2);
192 r1 = 1;
193 r1 <<= r2;
194 r2 = ~r1;
195
196 r1 = [p1];
197 r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
198 r6 = r1 & r2;
199 r1 = [p1];
200 r1 <<= 4;
201 r1 >>= 4;
202 [p1] = r1;
203
204 /*
205 * Release the core lock now, but keep IRQs disabled while we are
206 * performing the remaining housekeeping chores for the current CPU.
207 */
208 coreslot_loadaddr p0;
209 r1 = 0;
210 [p0] = r1;
211
212 /*
213 * If another CPU has owned the same atomic section before us,
214 * then our D-cached copy of the shared data protected by the
215 * current spin/write_lock may be obsolete.
216 */
217 cc = r6 == 0;
218 if cc jump .Lcache_synced
219
220 /*
221 * Invalidate the entire D-cache of the current core.
222 */
223 sp += -12;
224 call _resync_core_dcache
225 sp += 12;
226
227.Lcache_synced:
228 SSYNC(r2);
229 sti r7;
230 ( r7:6 ) = [sp++];
231 rets = [sp++];
232 rts
233
234/*
235 * r0 = irqflags
236 * r1 = address of atomic data
237 *
238 * Clobbers: r2:0, p1:0
239 */
240_end_lock_coherent:
241
242 p1 = r1;
243 GET_CPUID(p0, r2);
244 r2 += 28;
245 r1 = 1;
246 r1 <<= r2;
247 r2 = [p1];
248 r2 = r1 | r2;
249 [p1] = r2;
250 r1 = p1;
251 jump _put_core_lock;
252
253#endif /* __ARCH_SYNC_CORE_DCACHE */
254
255/*
256 * r0 = &spinlock->lock
257 *
258 * Clobbers: r3:0, p1:0
259 */
260ENTRY(___raw_spin_is_locked_asm)
261 p1 = r0;
262 [--sp] = rets;
263 call _get_core_lock;
264 r3 = [p1];
265 cc = bittst( r3, 0 );
266 r3 = cc;
267 r1 = p1;
268 call _put_core_lock;
269 rets = [sp++];
270 r0 = r3;
271 rts;
272ENDPROC(___raw_spin_is_locked_asm)
273
274/*
275 * r0 = &spinlock->lock
276 *
277 * Clobbers: r3:0, p1:0
278 */
279ENTRY(___raw_spin_lock_asm)
280 p1 = r0;
281 [--sp] = rets;
282.Lretry_spinlock:
283 call _get_core_lock;
284 r1 = p1;
285 r2 = [p1];
286 cc = bittst( r2, 0 );
287 if cc jump .Lbusy_spinlock
288#ifdef __ARCH_SYNC_CORE_DCACHE
289 r3 = p1;
290 bitset ( r2, 0 ); /* Raise the lock bit. */
291 [p1] = r2;
292 call _start_lock_coherent
293#else
294 r2 = 1;
295 [p1] = r2;
296 call _put_core_lock;
297#endif
298 rets = [sp++];
299 rts;
300
301.Lbusy_spinlock:
302 /* We don't touch the atomic area if busy, so that flush
303 will behave like nop in _put_core_lock. */
304 call _put_core_lock;
305 SSYNC(r2);
306 r0 = p1;
307 jump .Lretry_spinlock
308ENDPROC(___raw_spin_lock_asm)
309
310/*
311 * r0 = &spinlock->lock
312 *
313 * Clobbers: r3:0, p1:0
314 */
315ENTRY(___raw_spin_trylock_asm)
316 p1 = r0;
317 [--sp] = rets;
318 call _get_core_lock;
319 r1 = p1;
320 r3 = [p1];
321 cc = bittst( r3, 0 );
322 if cc jump .Lfailed_trylock
323#ifdef __ARCH_SYNC_CORE_DCACHE
324 bitset ( r3, 0 ); /* Raise the lock bit. */
325 [p1] = r3;
326 call _start_lock_coherent
327#else
328 r2 = 1;
329 [p1] = r2;
330 call _put_core_lock;
331#endif
332 r0 = 1;
333 rets = [sp++];
334 rts;
335.Lfailed_trylock:
336 call _put_core_lock;
337 r0 = 0;
338 rets = [sp++];
339 rts;
340ENDPROC(___raw_spin_trylock_asm)
341
342/*
343 * r0 = &spinlock->lock
344 *
345 * Clobbers: r2:0, p1:0
346 */
347ENTRY(___raw_spin_unlock_asm)
348 p1 = r0;
349 [--sp] = rets;
350 call _get_core_lock;
351 r2 = [p1];
352 bitclr ( r2, 0 );
353 [p1] = r2;
354 r1 = p1;
355#ifdef __ARCH_SYNC_CORE_DCACHE
356 call _end_lock_coherent
357#else
358 call _put_core_lock;
359#endif
360 rets = [sp++];
361 rts;
362ENDPROC(___raw_spin_unlock_asm)
363
364/*
365 * r0 = &rwlock->lock
366 *
367 * Clobbers: r2:0, p1:0
368 */
369ENTRY(___raw_read_lock_asm)
370 p1 = r0;
371 [--sp] = rets;
372 call _get_core_lock;
373.Lrdlock_try:
374 r1 = [p1];
375 r1 += -1;
376 [p1] = r1;
377 cc = r1 < 0;
378 if cc jump .Lrdlock_failed
379 r1 = p1;
380#ifdef __ARCH_SYNC_CORE_DCACHE
381 call _start_lock_coherent
382#else
383 call _put_core_lock;
384#endif
385 rets = [sp++];
386 rts;
387
388.Lrdlock_failed:
389 r1 += 1;
390 [p1] = r1;
391.Lrdlock_wait:
392 r1 = p1;
393 call _put_core_lock;
394 SSYNC(r2);
395 r0 = p1;
396 call _get_core_lock;
397 r1 = [p1];
398 cc = r1 < 2;
399 if cc jump .Lrdlock_wait;
400 jump .Lrdlock_try
401ENDPROC(___raw_read_lock_asm)
402
403/*
404 * r0 = &rwlock->lock
405 *
406 * Clobbers: r3:0, p1:0
407 */
408ENTRY(___raw_read_trylock_asm)
409 p1 = r0;
410 [--sp] = rets;
411 call _get_core_lock;
412 r1 = [p1];
413 cc = r1 <= 0;
414 if cc jump .Lfailed_tryrdlock;
415 r1 += -1;
416 [p1] = r1;
417 r1 = p1;
418#ifdef __ARCH_SYNC_CORE_DCACHE
419 call _start_lock_coherent
420#else
421 call _put_core_lock;
422#endif
423 rets = [sp++];
424 r0 = 1;
425 rts;
426.Lfailed_tryrdlock:
427 r1 = p1;
428 call _put_core_lock;
429 rets = [sp++];
430 r0 = 0;
431 rts;
432ENDPROC(___raw_read_trylock_asm)
433
434/*
435 * r0 = &rwlock->lock
436 *
437 * Note: Processing controlled by a reader lock should not have
438 * any side-effect on cache issues with the other core, so we
439 * just release the core lock and exit (no _end_lock_coherent).
440 *
441 * Clobbers: r3:0, p1:0
442 */
443ENTRY(___raw_read_unlock_asm)
444 p1 = r0;
445 [--sp] = rets;
446 call _get_core_lock;
447 r1 = [p1];
448 r1 += 1;
449 [p1] = r1;
450 r1 = p1;
451 call _put_core_lock;
452 rets = [sp++];
453 rts;
454ENDPROC(___raw_read_unlock_asm)
455
456/*
457 * r0 = &rwlock->lock
458 *
459 * Clobbers: r3:0, p1:0
460 */
461ENTRY(___raw_write_lock_asm)
462 p1 = r0;
463 r3.l = lo(RW_LOCK_BIAS);
464 r3.h = hi(RW_LOCK_BIAS);
465 [--sp] = rets;
466 call _get_core_lock;
467.Lwrlock_try:
468 r1 = [p1];
469 r1 = r1 - r3;
470#ifdef __ARCH_SYNC_CORE_DCACHE
471 r2 = r1;
472 r2 <<= 4;
473 r2 >>= 4;
474 cc = r2 == 0;
475#else
476 cc = r1 == 0;
477#endif
478 if !cc jump .Lwrlock_wait
479 [p1] = r1;
480 r1 = p1;
481#ifdef __ARCH_SYNC_CORE_DCACHE
482 call _start_lock_coherent
483#else
484 call _put_core_lock;
485#endif
486 rets = [sp++];
487 rts;
488
489.Lwrlock_wait:
490 r1 = p1;
491 call _put_core_lock;
492 SSYNC(r2);
493 r0 = p1;
494 call _get_core_lock;
495 r1 = [p1];
496#ifdef __ARCH_SYNC_CORE_DCACHE
497 r1 <<= 4;
498 r1 >>= 4;
499#endif
500 cc = r1 == r3;
501 if !cc jump .Lwrlock_wait;
502 jump .Lwrlock_try
503ENDPROC(___raw_write_lock_asm)
504
505/*
506 * r0 = &rwlock->lock
507 *
508 * Clobbers: r3:0, p1:0
509 */
510ENTRY(___raw_write_trylock_asm)
511 p1 = r0;
512 [--sp] = rets;
513 call _get_core_lock;
514 r1 = [p1];
515 r2.l = lo(RW_LOCK_BIAS);
516 r2.h = hi(RW_LOCK_BIAS);
517 cc = r1 == r2;
518 if !cc jump .Lfailed_trywrlock;
519#ifdef __ARCH_SYNC_CORE_DCACHE
520 r1 >>= 28;
521 r1 <<= 28;
522#else
523 r1 = 0;
524#endif
525 [p1] = r1;
526 r1 = p1;
527#ifdef __ARCH_SYNC_CORE_DCACHE
528 call _start_lock_coherent
529#else
530 call _put_core_lock;
531#endif
532 rets = [sp++];
533 r0 = 1;
534 rts;
535
536.Lfailed_trywrlock:
537 r1 = p1;
538 call _put_core_lock;
539 rets = [sp++];
540 r0 = 0;
541 rts;
542ENDPROC(___raw_write_trylock_asm)
543
544/*
545 * r0 = &rwlock->lock
546 *
547 * Clobbers: r3:0, p1:0
548 */
549ENTRY(___raw_write_unlock_asm)
550 p1 = r0;
551 r3.l = lo(RW_LOCK_BIAS);
552 r3.h = hi(RW_LOCK_BIAS);
553 [--sp] = rets;
554 call _get_core_lock;
555 r1 = [p1];
556 r1 = r1 + r3;
557 [p1] = r1;
558 r1 = p1;
559#ifdef __ARCH_SYNC_CORE_DCACHE
560 call _end_lock_coherent
561#else
562 call _put_core_lock;
563#endif
564 rets = [sp++];
565 rts;
566ENDPROC(___raw_write_unlock_asm)
567
568/*
569 * r0 = ptr
570 * r1 = value
571 *
572 * Add a signed value to a 32bit word and return the new value atomically.
573 * Clobbers: r3:0, p1:0
574 */
575ENTRY(___raw_atomic_update_asm)
576 p1 = r0;
577 r3 = r1;
578 [--sp] = rets;
579 call _get_core_lock;
580 r2 = [p1];
581 r3 = r3 + r2;
582 [p1] = r3;
583 r1 = p1;
584 call _put_core_lock;
585 r0 = r3;
586 rets = [sp++];
587 rts;
588ENDPROC(___raw_atomic_update_asm)
589
590/*
591 * r0 = ptr
592 * r1 = mask
593 *
594 * Clear the mask bits from a 32bit word and return the old 32bit value
595 * atomically.
596 * Clobbers: r3:0, p1:0
597 */
598ENTRY(___raw_atomic_clear_asm)
599 p1 = r0;
600 r3 = ~r1;
601 [--sp] = rets;
602 call _get_core_lock;
603 r2 = [p1];
604 r3 = r2 & r3;
605 [p1] = r3;
606 r3 = r2;
607 r1 = p1;
608 call _put_core_lock;
609 r0 = r3;
610 rets = [sp++];
611 rts;
612ENDPROC(___raw_atomic_clear_asm)
613
614/*
615 * r0 = ptr
616 * r1 = mask
617 *
618 * Set the mask bits into a 32bit word and return the old 32bit value
619 * atomically.
620 * Clobbers: r3:0, p1:0
621 */
622ENTRY(___raw_atomic_set_asm)
623 p1 = r0;
624 r3 = r1;
625 [--sp] = rets;
626 call _get_core_lock;
627 r2 = [p1];
628 r3 = r2 | r3;
629 [p1] = r3;
630 r3 = r2;
631 r1 = p1;
632 call _put_core_lock;
633 r0 = r3;
634 rets = [sp++];
635 rts;
636ENDPROC(___raw_atomic_set_asm)
637
638/*
639 * r0 = ptr
640 * r1 = mask
641 *
642 * XOR the mask bits with a 32bit word and return the old 32bit value
643 * atomically.
644 * Clobbers: r3:0, p1:0
645 */
646ENTRY(___raw_atomic_xor_asm)
647 p1 = r0;
648 r3 = r1;
649 [--sp] = rets;
650 call _get_core_lock;
651 r2 = [p1];
652 r3 = r2 ^ r3;
653 [p1] = r3;
654 r3 = r2;
655 r1 = p1;
656 call _put_core_lock;
657 r0 = r3;
658 rets = [sp++];
659 rts;
660ENDPROC(___raw_atomic_xor_asm)
661
662/*
663 * r0 = ptr
664 * r1 = mask
665 *
666 * Perform a logical AND between the mask bits and a 32bit word, and
667 * return the masked value. We need this on this architecture in
668 * order to invalidate the local cache before testing.
669 *
670 * Clobbers: r3:0, p1:0
671 */
672ENTRY(___raw_atomic_test_asm)
673 p1 = r0;
674 r3 = r1;
675 r1 = -L1_CACHE_BYTES;
676 r1 = r0 & r1;
677 p0 = r1;
678 flushinv[p0];
679 SSYNC(r2);
680 r0 = [p1];
681 r0 = r0 & r3;
682 rts;
683ENDPROC(___raw_atomic_test_asm)
684
685/*
686 * r0 = ptr
687 * r1 = value
688 *
689 * Swap *ptr with value and return the old 32bit value atomically.
690 * Clobbers: r3:0, p1:0
691 */
692#define __do_xchg(src, dst) \
693 p1 = r0; \
694 r3 = r1; \
695 [--sp] = rets; \
696 call _get_core_lock; \
697 r2 = src; \
698 dst = r3; \
699 r3 = r2; \
700 r1 = p1; \
701 call _put_core_lock; \
702 r0 = r3; \
703 rets = [sp++]; \
704 rts;
705
706ENTRY(___raw_xchg_1_asm)
707 __do_xchg(b[p1] (z), b[p1])
708ENDPROC(___raw_xchg_1_asm)
709
710ENTRY(___raw_xchg_2_asm)
711 __do_xchg(w[p1] (z), w[p1])
712ENDPROC(___raw_xchg_2_asm)
713
714ENTRY(___raw_xchg_4_asm)
715 __do_xchg([p1], [p1])
716ENDPROC(___raw_xchg_4_asm)
717
718/*
719 * r0 = ptr
720 * r1 = new
721 * r2 = old
722 *
723 * Swap *ptr with new if *ptr == old and return the previous *ptr
724 * value atomically.
725 *
726 * Clobbers: r3:0, p1:0
727 */
728#define __do_cmpxchg(src, dst) \
729 [--sp] = rets; \
730 [--sp] = r4; \
731 p1 = r0; \
732 r3 = r1; \
733 r4 = r2; \
734 call _get_core_lock; \
735 r2 = src; \
736 cc = r2 == r4; \
737 if !cc jump 1f; \
738 dst = r3; \
739 1: r3 = r2; \
740 r1 = p1; \
741 call _put_core_lock; \
742 r0 = r3; \
743 r4 = [sp++]; \
744 rets = [sp++]; \
745 rts;
746
747ENTRY(___raw_cmpxchg_1_asm)
748 __do_cmpxchg(b[p1] (z), b[p1])
749ENDPROC(___raw_cmpxchg_1_asm)
750
751ENTRY(___raw_cmpxchg_2_asm)
752 __do_cmpxchg(w[p1] (z), w[p1])
753ENDPROC(___raw_cmpxchg_2_asm)
754
755ENTRY(___raw_cmpxchg_4_asm)
756 __do_cmpxchg([p1], [p1])
757ENDPROC(___raw_cmpxchg_4_asm)
758
759/*
760 * r0 = ptr
761 * r1 = bitnr
762 *
763 * Set a bit in a 32bit word and return the old 32bit value atomically.
764 * Clobbers: r3:0, p1:0
765 */
766ENTRY(___raw_bit_set_asm)
767 r2 = r1;
768 r1 = 1;
769 r1 <<= r2;
770 jump ___raw_atomic_set_asm
771ENDPROC(___raw_bit_set_asm)
772
773/*
774 * r0 = ptr
775 * r1 = bitnr
776 *
777 * Clear a bit in a 32bit word and return the old 32bit value atomically.
778 * Clobbers: r3:0, p1:0
779 */
780ENTRY(___raw_bit_clear_asm)
781 r2 = r1;
782 r1 = 1;
783 r1 <<= r2;
784 jump ___raw_atomic_clear_asm
785ENDPROC(___raw_bit_clear_asm)
786
787/*
788 * r0 = ptr
789 * r1 = bitnr
790 *
791 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
792 * Clobbers: r3:0, p1:0
793 */
794ENTRY(___raw_bit_toggle_asm)
795 r2 = r1;
796 r1 = 1;
797 r1 <<= r2;
798 jump ___raw_atomic_xor_asm
799ENDPROC(___raw_bit_toggle_asm)
800
801/*
802 * r0 = ptr
803 * r1 = bitnr
804 *
805 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
806 * Clobbers: r3:0, p1:0
807 */
808ENTRY(___raw_bit_test_set_asm)
809 [--sp] = rets;
810 [--sp] = r1;
811 call ___raw_bit_set_asm
812 r1 = [sp++];
813 r2 = 1;
814 r2 <<= r1;
815 r0 = r0 & r2;
816 cc = r0 == 0;
817 if cc jump 1f
818 r0 = 1;
8191:
820 rets = [sp++];
821 rts;
822ENDPROC(___raw_bit_test_set_asm)
823
824/*
825 * r0 = ptr
826 * r1 = bitnr
827 *
828 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
829 * Clobbers: r3:0, p1:0
830 */
831ENTRY(___raw_bit_test_clear_asm)
832 [--sp] = rets;
833 [--sp] = r1;
834 call ___raw_bit_clear_asm
835 r1 = [sp++];
836 r2 = 1;
837 r2 <<= r1;
838 r0 = r0 & r2;
839 cc = r0 == 0;
840 if cc jump 1f
841 r0 = 1;
8421:
843 rets = [sp++];
844 rts;
845ENDPROC(___raw_bit_test_clear_asm)
846
847/*
848 * r0 = ptr
849 * r1 = bitnr
850 *
851 * Test-and-toggle a bit in a 32bit word,
852 * and return the old bit value atomically.
853 * Clobbers: r3:0, p1:0
854 */
855ENTRY(___raw_bit_test_toggle_asm)
856 [--sp] = rets;
857 [--sp] = r1;
858 call ___raw_bit_toggle_asm
859 r1 = [sp++];
860 r2 = 1;
861 r2 <<= r1;
862 r0 = r0 & r2;
863 cc = r0 == 0;
864 if cc jump 1f
865 r0 = 1;
8661:
867 rets = [sp++];
868 rts;
869ENDPROC(___raw_bit_test_toggle_asm)
870
871/*
872 * r0 = ptr
873 * r1 = bitnr
874 *
875 * Test a bit in a 32bit word and return its value.
876 * We need this on this architecture in order to invalidate
877 * the local cache before testing.
878 *
879 * Clobbers: r3:0, p1:0
880 */
881ENTRY(___raw_bit_test_asm)
882 r2 = r1;
883 r1 = 1;
884 r1 <<= r2;
885 jump ___raw_atomic_test_asm
886ENDPROC(___raw_bit_test_asm)
887
888/*
889 * r0 = ptr
890 *
891 * Fetch and return an uncached 32bit value.
892 *
893 * Clobbers: r2:0, p1:0
894 */
895ENTRY(___raw_uncached_fetch_asm)
896 p1 = r0;
897 r1 = -L1_CACHE_BYTES;
898 r1 = r0 & r1;
899 p0 = r1;
900 flushinv[p0];
901 SSYNC(r2);
902 r0 = [p1];
903 rts;
904ENDPROC(___raw_uncached_fetch_asm)