Geert Uytterhoeven | 2cd1de0 | 2009-01-04 11:58:20 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Fast C2P (Chunky-to-Planar) Conversion |
| 3 | * |
| 4 | * Copyright (C) 2003-2008 Geert Uytterhoeven |
| 5 | * |
| 6 | * NOTES: |
| 7 | * - This code was inspired by Scout's C2P tutorial |
| 8 | * - It assumes to run on a big endian system |
| 9 | * |
| 10 | * This file is subject to the terms and conditions of the GNU General Public |
| 11 | * License. See the file COPYING in the main directory of this archive |
| 12 | * for more details. |
| 13 | */ |
| 14 | |
| 15 | |
| 16 | /* |
| 17 | * Basic transpose step |
| 18 | */ |
| 19 | |
| 20 | static inline void _transp(u32 d[], unsigned int i1, unsigned int i2, |
| 21 | unsigned int shift, u32 mask) |
| 22 | { |
| 23 | u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; |
| 24 | |
| 25 | d[i1] ^= t; |
| 26 | d[i2] ^= t << shift; |
| 27 | } |
| 28 | |
| 29 | |
| 30 | extern void c2p_unsupported(void); |
| 31 | |
| 32 | static inline u32 get_mask(unsigned int n) |
| 33 | { |
| 34 | switch (n) { |
| 35 | case 1: |
| 36 | return 0x55555555; |
| 37 | |
| 38 | case 2: |
| 39 | return 0x33333333; |
| 40 | |
| 41 | case 4: |
| 42 | return 0x0f0f0f0f; |
| 43 | |
| 44 | case 8: |
| 45 | return 0x00ff00ff; |
| 46 | |
| 47 | case 16: |
| 48 | return 0x0000ffff; |
| 49 | } |
| 50 | |
| 51 | c2p_unsupported(); |
| 52 | return 0; |
| 53 | } |
| 54 | |
| 55 | |
| 56 | /* |
| 57 | * Transpose operations on 8 32-bit words |
| 58 | */ |
| 59 | |
| 60 | static inline void transp8(u32 d[], unsigned int n, unsigned int m) |
| 61 | { |
| 62 | u32 mask = get_mask(n); |
| 63 | |
| 64 | switch (m) { |
| 65 | case 1: |
| 66 | /* First n x 1 block */ |
| 67 | _transp(d, 0, 1, n, mask); |
| 68 | /* Second n x 1 block */ |
| 69 | _transp(d, 2, 3, n, mask); |
| 70 | /* Third n x 1 block */ |
| 71 | _transp(d, 4, 5, n, mask); |
| 72 | /* Fourth n x 1 block */ |
| 73 | _transp(d, 6, 7, n, mask); |
| 74 | return; |
| 75 | |
| 76 | case 2: |
| 77 | /* First n x 2 block */ |
| 78 | _transp(d, 0, 2, n, mask); |
| 79 | _transp(d, 1, 3, n, mask); |
| 80 | /* Second n x 2 block */ |
| 81 | _transp(d, 4, 6, n, mask); |
| 82 | _transp(d, 5, 7, n, mask); |
| 83 | return; |
| 84 | |
| 85 | case 4: |
| 86 | /* Single n x 4 block */ |
| 87 | _transp(d, 0, 4, n, mask); |
| 88 | _transp(d, 1, 5, n, mask); |
| 89 | _transp(d, 2, 6, n, mask); |
| 90 | _transp(d, 3, 7, n, mask); |
| 91 | return; |
| 92 | } |
| 93 | |
| 94 | c2p_unsupported(); |
| 95 | } |
| 96 | |
| 97 | |
| 98 | /* |
Geert Uytterhoeven | 96f47d6 | 2008-12-21 15:48:12 +0100 | [diff] [blame^] | 99 | * Transpose operations on 4 32-bit words |
| 100 | */ |
| 101 | |
| 102 | static inline void transp4(u32 d[], unsigned int n, unsigned int m) |
| 103 | { |
| 104 | u32 mask = get_mask(n); |
| 105 | |
| 106 | switch (m) { |
| 107 | case 1: |
| 108 | /* First n x 1 block */ |
| 109 | _transp(d, 0, 1, n, mask); |
| 110 | /* Second n x 1 block */ |
| 111 | _transp(d, 2, 3, n, mask); |
| 112 | return; |
| 113 | |
| 114 | case 2: |
| 115 | /* Single n x 2 block */ |
| 116 | _transp(d, 0, 2, n, mask); |
| 117 | _transp(d, 1, 3, n, mask); |
| 118 | return; |
| 119 | } |
| 120 | |
| 121 | c2p_unsupported(); |
| 122 | } |
| 123 | |
| 124 | |
| 125 | /* |
| 126 | * Transpose operations on 4 32-bit words (reverse order) |
| 127 | */ |
| 128 | |
| 129 | static inline void transp4x(u32 d[], unsigned int n, unsigned int m) |
| 130 | { |
| 131 | u32 mask = get_mask(n); |
| 132 | |
| 133 | switch (m) { |
| 134 | case 2: |
| 135 | /* Single n x 2 block */ |
| 136 | _transp(d, 2, 0, n, mask); |
| 137 | _transp(d, 3, 1, n, mask); |
| 138 | return; |
| 139 | } |
| 140 | |
| 141 | c2p_unsupported(); |
| 142 | } |
| 143 | |
| 144 | |
| 145 | /* |
Geert Uytterhoeven | 2cd1de0 | 2009-01-04 11:58:20 +0100 | [diff] [blame] | 146 | * Compose two values, using a bitmask as decision value |
| 147 | * This is equivalent to (a & mask) | (b & ~mask) |
| 148 | */ |
| 149 | |
| 150 | static inline u32 comp(u32 a, u32 b, u32 mask) |
| 151 | { |
| 152 | return ((a ^ b) & mask) ^ b; |
| 153 | } |