blob: d4b257981aa32b07419725000c72e3be8cbc86c8 [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/t32cb16blend.S
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18
19 .text
20 .align
21
22 .global scanline_t32cb16blend_arm
23
24// uses r6, r7, lr
25
26.macro pixel, DREG, SRC, FB, OFFSET
27
28 // SRC = AARRGGBB
29 mov r7, \SRC, lsr #24 // sA
30 add r7, r7, r7, lsr #7 // sA + (sA >> 7)
31 rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))
32
331:
34
35.if \OFFSET
36
37 // red
38 mov lr, \DREG, lsr #(\OFFSET + 6 + 5)
39 smulbb lr, r7, lr
40 mov r6, \SRC, lsr #3
41 and r6, r6, #0x1F
42 add lr, r6, lr, lsr #8
43 orr \FB, lr, lsl #(\OFFSET + 11)
44
45 // green
46 and r6, \DREG, #(0x3F<<(\OFFSET + 5))
47 smulbt r6, r7, r6
48 mov lr, \SRC, lsr #(8+2)
49 and lr, lr, #0x3F
50 add r6, lr, r6, lsr #(5+8)
51 orr \FB, \FB, r6, lsl #(\OFFSET + 5)
52
53 // blue
54 and lr, \DREG, #(0x1F << \OFFSET)
55 smulbt lr, r7, lr
56 mov r6, \SRC, lsr #(8+8+3)
57 and r6, r6, #0x1F
58 add lr, r6, lr, lsr #8
59 orr \FB, \FB, lr, lsl #\OFFSET
60
61.else
62
63 // red
64 mov lr, \DREG, lsr #(6+5)
65 and lr, lr, #0x1F
66 smulbb lr, r7, lr
67 mov r6, \SRC, lsr #3
68 and r6, r6, #0x1F
69 add lr, r6, lr, lsr #8
70 mov \FB, lr, lsl #11
71
72 // green
73 and r6, \DREG, #(0x3F<<5)
74 smulbb r6, r7, r6
75 mov lr, \SRC, lsr #(8+2)
76 and lr, lr, #0x3F
77 add r6, lr, r6, lsr #(5+8)
78 orr \FB, \FB, r6, lsl #5
79
80 // blue
81 and lr, \DREG, #0x1F
82 smulbb lr, r7, lr
83 mov r6, \SRC, lsr #(8+8+3)
84 and r6, r6, #0x1F
85 add lr, r6, lr, lsr #8
86 orr \FB, \FB, lr
87
88.endif
89
90 .endm
91
92
93// r0: dst ptr
94// r1: src ptr
95// r2: count
96// r3: d
97// r4: s0
98// r5: s1
99// r6: pixel
100// r7: pixel
101// r8: free
102// r9: free
103// r10: free
104// r11: free
105// r12: scratch
106// r14: pixel
107
108scanline_t32cb16blend_arm:
109 stmfd sp!, {r4-r7, lr}
110
111 pld [r0]
112 pld [r1]
113
114 // align DST to 32 bits
115 tst r0, #0x3
116 beq aligned
117 subs r2, r2, #1
118 ldmlofd sp!, {r4-r7, lr} // return
119 bxlo lr
120
121last:
122 ldr r4, [r1], #4
123 ldrh r3, [r0]
124 pixel r3, r4, r12, 0
125 strh r12, [r0], #2
126
127aligned:
128 subs r2, r2, #2
129 blo 9f
130
131 // The main loop is unrolled twice and process 4 pixels
1328: ldmia r1!, {r4, r5}
133 // stream the source
134 pld [r1, #32]
135 add r0, r0, #4
136 // it's all zero, skip this pixel
137 orrs r3, r4, r5
138 beq 7f
139
140 // load the destination
141 ldr r3, [r0, #-4]
142 // stream the destination
143 pld [r0, #32]
144 pixel r3, r4, r12, 0
145 pixel r3, r5, r12, 16
146 // effectively, we're getting write-combining by virtue of the
147 // cpu's write-back cache.
148 str r12, [r0, #-4]
149
150 // 2nd iterration of the loop, don't stream anything
151 subs r2, r2, #2
152 movlt r4, r5
153 blt 9f
154 ldmia r1!, {r4, r5}
155 add r0, r0, #4
156 orrs r3, r4, r5
157 beq 7f
158 ldr r3, [r0, #-4]
159 pixel r3, r4, r12, 0
160 pixel r3, r5, r12, 16
161 str r12, [r0, #-4]
162
163
1647: subs r2, r2, #2
165 bhs 8b
166 mov r4, r5
167
1689: adds r2, r2, #1
169 ldmlofd sp!, {r4-r7, lr} // return
170 bxlo lr
171 b last