-
Notifications
You must be signed in to change notification settings - Fork 1
/
fft.cl
103 lines (76 loc) · 3.15 KB
/
fft.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#define PI 3.14159265358979323846
#define PI_2 1.57079632679489661923
__kernel void spinFact(__global float2 *w, int n) {
unsigned int i = get_global_id(0);
float2 angle =
(float2)(2 * i * PI / (float)n, (2 * i * PI / (float)n) + PI_2);
w[i] = cos(angle);
}
__kernel void bitReverse(__global float2 *dst, __global float2 *src, int m,
int n) {
unsigned int gid = get_global_id(0);
unsigned int nid = get_global_id(1);
unsigned int j = gid;
j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
j >>= (32 - m);
dst[nid * n + j] = src[nid * n + gid];
}
__kernel void norm(__global float2 *x, int n) {
unsigned int gid = get_global_id(0);
unsigned int nid = get_global_id(1);
x[nid * n + gid] = x[nid * n + gid] / (float2)((float)n, (float)n);
}
__kernel void butterfly(__global float2 *x, __global float2 *w, int m, int n,
int iter, uint flag) {
unsigned int gid = get_global_id(0);
unsigned int nid = get_global_id(1);
int butterflySize = 1 << (iter - 1);
int butterflyGrpDist = 1 << iter;
int butterflyGrpNum = n >> iter;
int butterflyGrpBase = (gid >> (iter - 1)) * (butterflyGrpDist);
int butterflyGrpOffset = gid & (butterflySize - 1);
int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
int b = a + butterflySize;
int l = butterflyGrpNum * butterflyGrpOffset;
float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
xa = x[a];
xb = x[b];
xbxx = xb.xx;
xbyy = xb.yy;
wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag));
wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0));
wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000));
resa = xa + xbxx * wab + xbyy * wayx;
resb = xa - xbxx * wab + xbyy * wbyx;
x[a] = resa;
x[b] = resb;
}
__kernel void transpose(__global float2 *dst, __global float2 *src, int n) {
unsigned int xgid = get_global_id(0);
unsigned int ygid = get_global_id(1);
unsigned int iid = ygid * n + xgid;
unsigned int oid = xgid * n + ygid;
dst[oid] = src[iid];
}
__kernel void highPassFilter(__global float2 *image, int n, int radius) {
unsigned int xgid = get_global_id(0);
unsigned int ygid = get_global_id(1);
int2 n_2 = (int2)(n >> 1, n >> 1);
int2 mask = (int2)(n - 1, n - 1);
int2 gid = ((int2)(xgid, ygid) + n_2) & mask;
int2 diff = n_2 - gid;
int2 diff2 = diff * diff;
int dist2 = diff2.x + diff2.y;
int2 window;
if (dist2 < radius * radius) {
window = (int2)(0L, 0L);
} else {
window = (int2)(-1L, -1L);
}
image[ygid * n + xgid] =
as_float2(as_int2(image[ygid * n + xgid]) & window);
}