Blame - libpixelflinger/t32cb16blend.S - platform/system/core

blob: d4b257981aa32b07419725000c72e3be8cbc86c8 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame^]	1	/* libs/pixelflinger/t32cb16blend.S
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	.text
				20	.align
				21
				22	.global scanline_t32cb16blend_arm
				23
				24	// uses r6, r7, lr
				25
				26	.macro pixel, DREG, SRC, FB, OFFSET
				27
				28	// SRC = AARRGGBB
				29	mov r7, \SRC, lsr #24 // sA
				30	add r7, r7, r7, lsr #7 // sA + (sA >> 7)
				31	rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))
				32
				33	1:
				34
				35	.if \OFFSET
				36
				37	// red
				38	mov lr, \DREG, lsr #(\OFFSET + 6 + 5)
				39	smulbb lr, r7, lr
				40	mov r6, \SRC, lsr #3
				41	and r6, r6, #0x1F
				42	add lr, r6, lr, lsr #8
				43	orr \FB, lr, lsl #(\OFFSET + 11)
				44
				45	// green
				46	and r6, \DREG, #(0x3F<<(\OFFSET + 5))
				47	smulbt r6, r7, r6
				48	mov lr, \SRC, lsr #(8+2)
				49	and lr, lr, #0x3F
				50	add r6, lr, r6, lsr #(5+8)
				51	orr \FB, \FB, r6, lsl #(\OFFSET + 5)
				52
				53	// blue
				54	and lr, \DREG, #(0x1F << \OFFSET)
				55	smulbt lr, r7, lr
				56	mov r6, \SRC, lsr #(8+8+3)
				57	and r6, r6, #0x1F
				58	add lr, r6, lr, lsr #8
				59	orr \FB, \FB, lr, lsl #\OFFSET
				60
				61	.else
				62
				63	// red
				64	mov lr, \DREG, lsr #(6+5)
				65	and lr, lr, #0x1F
				66	smulbb lr, r7, lr
				67	mov r6, \SRC, lsr #3
				68	and r6, r6, #0x1F
				69	add lr, r6, lr, lsr #8
				70	mov \FB, lr, lsl #11
				71
				72	// green
				73	and r6, \DREG, #(0x3F<<5)
				74	smulbb r6, r7, r6
				75	mov lr, \SRC, lsr #(8+2)
				76	and lr, lr, #0x3F
				77	add r6, lr, r6, lsr #(5+8)
				78	orr \FB, \FB, r6, lsl #5
				79
				80	// blue
				81	and lr, \DREG, #0x1F
				82	smulbb lr, r7, lr
				83	mov r6, \SRC, lsr #(8+8+3)
				84	and r6, r6, #0x1F
				85	add lr, r6, lr, lsr #8
				86	orr \FB, \FB, lr
				87
				88	.endif
				89
				90	.endm
				91
				92
				93	// r0: dst ptr
				94	// r1: src ptr
				95	// r2: count
				96	// r3: d
				97	// r4: s0
				98	// r5: s1
				99	// r6: pixel
				100	// r7: pixel
				101	// r8: free
				102	// r9: free
				103	// r10: free
				104	// r11: free
				105	// r12: scratch
				106	// r14: pixel
				107
				108	scanline_t32cb16blend_arm:
				109	stmfd sp!, {r4-r7, lr}
				110
				111	pld [r0]
				112	pld [r1]
				113
				114	// align DST to 32 bits
				115	tst r0, #0x3
				116	beq aligned
				117	subs r2, r2, #1
				118	ldmlofd sp!, {r4-r7, lr} // return
				119	bxlo lr
				120
				121	last:
				122	ldr r4, [r1], #4
				123	ldrh r3, [r0]
				124	pixel r3, r4, r12, 0
				125	strh r12, [r0], #2
				126
				127	aligned:
				128	subs r2, r2, #2
				129	blo 9f
				130
				131	// The main loop is unrolled twice and process 4 pixels
				132	8: ldmia r1!, {r4, r5}
				133	// stream the source
				134	pld [r1, #32]
				135	add r0, r0, #4
				136	// it's all zero, skip this pixel
				137	orrs r3, r4, r5
				138	beq 7f
				139
				140	// load the destination
				141	ldr r3, [r0, #-4]
				142	// stream the destination
				143	pld [r0, #32]
				144	pixel r3, r4, r12, 0
				145	pixel r3, r5, r12, 16
				146	// effectively, we're getting write-combining by virtue of the
				147	// cpu's write-back cache.
				148	str r12, [r0, #-4]
				149
				150	// 2nd iterration of the loop, don't stream anything
				151	subs r2, r2, #2
				152	movlt r4, r5
				153	blt 9f
				154	ldmia r1!, {r4, r5}
				155	add r0, r0, #4
				156	orrs r3, r4, r5
				157	beq 7f
				158	ldr r3, [r0, #-4]
				159	pixel r3, r4, r12, 0
				160	pixel r3, r5, r12, 16
				161	str r12, [r0, #-4]
				162
				163
				164	7: subs r2, r2, #2
				165	bhs 8b
				166	mov r4, r5
				167
				168	9: adds r2, r2, #1
				169	ldmlofd sp!, {r4-r7, lr} // return
				170	bxlo lr
				171	b last