blob: 25c7b483c476a0d6b9910e07f2db37ceab3bfa43 [file] [log] [blame]
/*
* function: kernel_ee
* input: image2d_t as read only
* output: image2d_t as write only
* ee_config: Edge enhancement configuration
*/
typedef struct
{
float ee_gain;
float ee_threshold;
float nr_gain;
} CLEeConfig;
__constant float lv[25] = {0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, -1.0, 0.0, 0.0,
-1.0, -14.0, 32.0, -14.0, -1.0,
0.0, 0.0, -1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0
};
__constant float lh[25] = {0.0, 0.0, -1.0, 0.0, 0.0,
0.0, 0.0, -14.0, 0.0, 0.0,
0.0, -1.0, 32.0, -1.0, 0.0,
0.0, 0.0, -14.0, 0.0, 0.0,
0.0, 0.0, -1.0, 0.0, 0.0
};
__constant float la[25] = {0.0, 0.0, -2.0, 0.0, 0.0,
0.0, -2.0, -2.0, -2.0, 0.0,
-2.0, -2.0, 24.0, -2.0, -2.0,
0.0, -2.0, -2.0, -2.0, 0.0,
0.0, 0.0, -2.0, 0.0, 0.0
};
__constant float na[25] = { -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, 16.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0
};
__kernel void kernel_ee (__read_only image2d_t input, __write_only image2d_t output, uint vertical_offset, CLEeConfig ee_config)
{
int x = get_global_id (0);
int y = get_global_id (1);
int X = get_global_size(0);
int Y = get_global_size(1);
sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
float4 y_in, y_out, uv_in;
float4 a[5], b[5], c[5], d[5], e[5];
// cpy UV
if(y % 2 == 0) {
uv_in = read_imagef(input, sampler, (int2)(x, y / 2 + vertical_offset));
write_imagef(output, (int2)(x, y / 2 + vertical_offset), uv_in);
}
a[0] = read_imagef(input, sampler, (int2)(x - 2, y - 2));
a[1] = read_imagef(input, sampler, (int2)(x - 1, y - 2));
a[2] = read_imagef(input, sampler, (int2)(x, y - 2));
a[3] = read_imagef(input, sampler, (int2)(x + 1, y - 2));
a[4] = read_imagef(input, sampler, (int2)(x + 2, y - 2));
b[0] = read_imagef(input, sampler, (int2)(x - 2, y - 1));
b[1] = read_imagef(input, sampler, (int2)(x - 1, y - 1));
b[2] = read_imagef(input, sampler, (int2)(x, y - 1));
b[3] = read_imagef(input, sampler, (int2)(x + 1, y - 1));
b[4] = read_imagef(input, sampler, (int2)(x + 2, y - 1));
c[0] = read_imagef(input, sampler, (int2)(x - 2, y));
c[1] = read_imagef(input, sampler, (int2)(x - 1, y));
c[2] = read_imagef(input, sampler, (int2)(x, y));
c[3] = read_imagef(input, sampler, (int2)(x + 1, y));
c[4] = read_imagef(input, sampler, (int2)(x + 2, y));
d[0] = read_imagef(input, sampler, (int2)(x - 2, y + 1));
d[1] = read_imagef(input, sampler, (int2)(x - 1, y + 1));
d[2] = read_imagef(input, sampler, (int2)(x, y + 1));
d[3] = read_imagef(input, sampler, (int2)(x + 1, y + 1));
d[4] = read_imagef(input, sampler, (int2)(x + 2, y + 1));
e[0] = read_imagef(input, sampler, (int2)(x - 2, y + 2));
e[1] = read_imagef(input, sampler, (int2)(x - 1, y + 2));
e[2] = read_imagef(input, sampler, (int2)(x, y + 2));
e[3] = read_imagef(input, sampler, (int2)(x + 1, y + 2));
e[4] = read_imagef(input, sampler, (int2)(x + 2, y + 2));
float eV = (a[0].x * lv[0] + a[1].x * lv[1] + a[2].x * lv[2] + a[3].x * lv[3] + a[4].x * lv[4]
+ b[0].x * lv[5] + b[1].x * lv[6] + b[2].x * lv[7] + b[3].x * lv[8] + b[4].x * lv[9]
+ c[0].x * lv[10] + c[1].x * lv[11] + c[2].x * lv[12] + c[3].x * lv[13] + c[4].x * lv[14]
+ d[0].x * lv[15] + d[1].x * lv[16] + d[2].x * lv[17] + d[3].x * lv[18] + d[4].x * lv[19]
+ e[0].x * lv[20] + e[1].x * lv[21] + e[2].x * lv[22] + e[3].x * lv[23] + e[4].x * lv[24]) * 255.0;
float eH = (a[0].x * lh[0] + a[1].x * lh[1] + a[2].x * lh[2] + a[3].x * lh[3] + a[4].x * lh[4]
+ b[0].x * lh[5] + b[1].x * lh[6] + b[2].x * lh[7] + b[3].x * lh[8] + b[4].x * lh[9]
+ c[0].x * lh[10] + c[1].x * lh[11] + c[2].x * lh[12] + c[3].x * lh[13] + c[4].x * lh[14]
+ d[0].x * lh[15] + d[1].x * lh[16] + d[2].x * lh[17] + d[3].x * lh[18] + d[4].x * lh[19]
+ e[0].x * lh[20] + e[1].x * lh[21] + e[2].x * lh[22] + e[3].x * lh[23] + e[4].x * lh[24]) * 255.0;
float eA = (a[0].x * la[0] + a[1].x * la[1] + a[2].x * la[2] + a[3].x * la[3] + a[4].x * la[4]
+ b[0].x * la[5] + b[1].x * la[6] + b[2].x * la[7] + b[3].x * la[8] + b[4].x * la[9]
+ c[0].x * la[10] + c[1].x * la[11] + c[2].x * la[12] + c[3].x * la[13] + c[4].x * la[14]
+ d[0].x * la[15] + d[1].x * la[16] + d[2].x * la[17] + d[3].x * la[18] + d[4].x * la[19]
+ e[0].x * la[20] + e[1].x * la[21] + e[2].x * la[22] + e[3].x * la[23] + e[4].x * la[24]) * 255.0;
float nA = (a[0].x * na[0] + a[1].x * na[1] + a[2].x * na[2] + a[3].x * na[3] + a[4].x * na[4]
+ b[0].x * na[5] + b[1].x * na[6] + b[2].x * na[7] + b[3].x * na[8] + b[4].x * na[9]
+ c[0].x * na[10] + c[1].x * na[11] + c[2].x * na[12] + c[3].x * na[13] + c[4].x * na[14]
+ d[0].x * na[15] + d[1].x * na[16] + d[2].x * na[17] + d[3].x * na[18] + d[4].x * na[19]
+ e[0].x * na[20] + e[1].x * na[21] + e[2].x * na[22] + e[3].x * na[23] + e[4].x * na[24]) * 255.0;
float nV = eH;
float nH = eV;
float dV = (fabs(2.0 * b[1].x - a[1].x - c[1].x) + fabs(2.0 * b[2].x - a[2].x - c[2].x) + fabs(2.0 * b[3].x - a[3].x - c[3].x) + fabs(2.0 * c[1].x - b[1].x - d[1].x) + fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[3].x - b[3].x - d[3].x) + fabs(2.0 * d[1].x - c[1].x - e[1].x) + fabs(2.0 * d[2].x - c[2].x - e[2].x) + fabs(2.0 * d[3].x - c[3].x - e[3].x)) * 255.0;
float dH = (fabs(2.0 * b[1].x - b[0].x - b[2].x) + fabs(2.0 * b[2].x - b[1].x - b[3].x) + fabs(2.0 * b[3].x - b[2].x - b[4].x) + fabs(2.0 * c[1].x - c[0].x - c[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[3].x - c[2].x - c[4].x) + fabs(2.0 * d[1].x - d[0].x - d[2].x) + fabs(2.0 * d[2].x - d[1].x - d[3].x) + fabs(2.0 * d[3].x - d[2].x - d[4].x)) * 255.0;
float dA = (fabs(2.0 * c[2].x - b[2].x - d[2].x) + fabs(2.0 * c[2].x - c[1].x - c[3].x) + fabs(2.0 * c[2].x - b[1].x - d[3].x) + fabs(2.0 * c[2].x - b[3].x - d[1].x) + fabs(2.0 * c[2].x - a[0].x - e[4].x) + fabs(2.0 * c[2].x - a[4].x - e[0].x) + fabs(2.0 * c[2].x - c[0].x - c[4].x) + fabs(2.0 * c[2].x - a[2].x - e[2].x) + fabs(2.0 * c[2].x - (b[0].x + d[0].x + b[4].x + d[4].x) / 2.0)) * 255.0;
float edge = dH < (dV < dA ? dV : dA) ? eH : (dV < dA ? eV : eA);
float noise = dH < (dV < dA ? dV : dA) ? nH : (dV < dA ? nV : nA);
float dir = dH < (dV < dA ? dA : dV) ? (dV < dA ? dA : dV) : dH;
noise = noise * ee_config.nr_gain / 16.0;
edge = edge * ee_config.ee_gain / 16.0;
y_out.x = dir > ee_config.ee_threshold ? (c[2].x * 255.0 + edge - noise) / 255.0 : c[2].x;
y_out.y = 0.0;
y_out.z = 0.0;
y_out.w = 1.0;
write_imagef(output, (int2)(x, y), y_out);
}