Blame - src/opts/SkBitmapProcState_filter_neon.h - platform/external/skqp

2012-08-13 14:06:34 +0000

[diff] [blame]

/*

*

* Use of this source code is governed by a BSD-style license that can be

5

* found in the LICENSE file.

6

*/

7

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

8

#include <arm_neon.h>

digit@google.com

2012-08-13 14:06:34 +0000

[diff] [blame]

9

#include "SkColorPriv.h"

10

11

/*

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

12

* Filter_32_opaque

13

*

14

* There is no hard-n-fast rule that the filtering must produce

15

* exact results for the color components, but if the 4 incoming colors are

16

* all opaque, then the output color must also be opaque. Subsequent parts of

17

* the drawing pipeline may rely on this (e.g. which blitrow proc to use).

commit-bot@chromium.org

f71be96

2014-05-13 14:47:11 +0000

[diff] [blame]

18

*

digit@google.com

2012-08-13 14:06:34 +0000

[diff] [blame]

19

*/

commit-bot@chromium.org

f71be96

2014-05-13 14:47:11 +0000

[diff] [blame]

20

// Chrome on Android uses -Os so we need to force these inline. Otherwise

21

// calling the function in the inner loops will cause significant overhead on

22

// some platforms.

23

static SK_ALWAYS_INLINE void Filter_32_opaque_neon(unsigned x, unsigned y,

24

SkPMColor a00, SkPMColor a01,

25

SkPMColor a10, SkPMColor a11,

26

SkPMColor *dst) {

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

27

uint8x8_t vy, vconst16_8, v16_y, vres;

28

uint16x4_t vx, vconst16_16, v16_x, tmp;

29

uint32x2_t va0, va1;

30

uint16x8_t tmp1, tmp2;

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

31

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

32

vy = vdup_n_u8(y); // duplicate y into vy

33

vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8

34

v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

35

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

36

va0 = vdup_n_u32(a00); // duplicate a00

37

va1 = vdup_n_u32(a10); // duplicate a10

38

va0 = vset_lane_u32(a01, va0, 1); // set top to a01

39

va1 = vset_lane_u32(a11, va1, 1); // set top to a11

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

40

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

41

tmp1 = vmull_u8(vreinterpret_u8_u32(va0), v16_y); // tmp1 = [a01|a00] * (16-y)

42

tmp2 = vmull_u8(vreinterpret_u8_u32(va1), vy); // tmp2 = [a11|a10] * y

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

43

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

44

vx = vdup_n_u16(x); // duplicate x into vx

45

vconst16_16 = vmov_n_u16(16); // set up constant in vconst16_16

46

v16_x = vsub_u16(vconst16_16, vx); // v16_x = 16-x

47

48

tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x

49

tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x

50

tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)

51

tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)

52

53

vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down result by 8

54

vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result

digit@google.com

2012-08-13 14:06:34 +0000

[diff] [blame]

55

}

56

commit-bot@chromium.org

f71be96

2014-05-13 14:47:11 +0000

[diff] [blame]

57

static SK_ALWAYS_INLINE void Filter_32_alpha_neon(unsigned x, unsigned y,

58

SkPMColor a00, SkPMColor a01,

59

SkPMColor a10, SkPMColor a11,

60

SkPMColor *dst,

61

uint16_t scale) {

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

62

uint8x8_t vy, vconst16_8, v16_y, vres;

63

uint16x4_t vx, vconst16_16, v16_x, tmp, vscale;

64

uint32x2_t va0, va1;

65

uint16x8_t tmp1, tmp2;

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

66

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

67

vy = vdup_n_u8(y); // duplicate y into vy

68

vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8

69

v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

70

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

71

va0 = vdup_n_u32(a00); // duplicate a00

72

va1 = vdup_n_u32(a10); // duplicate a10

73

va0 = vset_lane_u32(a01, va0, 1); // set top to a01

74

va1 = vset_lane_u32(a11, va1, 1); // set top to a11

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

75

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

76

tmp1 = vmull_u8(vreinterpret_u8_u32(va0), v16_y); // tmp1 = [a01|a00] * (16-y)

77

tmp2 = vmull_u8(vreinterpret_u8_u32(va1), vy); // tmp2 = [a11|a10] * y

rmistry@google.com

2012-08-23 18:09:54 +0000

[diff] [blame]

78

commit-bot@chromium.org

2013-09-13 12:39:09 +0000

[diff] [blame]

79

vx = vdup_n_u16(x); // duplicate x into vx

80

vconst16_16 = vmov_n_u16(16); // set up constant in vconst16_16

81

v16_x = vsub_u16(vconst16_16, vx); // v16_x = 16-x

82

83

tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x

84

tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x

85

tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)

86

tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)

87

88

vscale = vdup_n_u16(scale); // duplicate scale

89

tmp = vshr_n_u16(tmp, 8); // shift down result by 8

90

tmp = vmul_u16(tmp, vscale); // multiply result by scale

91

92

vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down result by 8

93

vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result

digit@google.com