/Test_Misc/test1/xxx-2.cl
http://oclbenchmarks.codeplex.com · OpenCL · 48 lines · 44 code · 4 blank · 0 comment · 0 complexity · 9697e51f5d5196e70721da04baae479c MD5 · raw file
- #define FPTYPE float
- __kernel void
- innerProductKer_1( __global const FPTYPE * restrict vec_1,
- __global const FPTYPE * restrict vec_2,
- __global FPTYPE * restrict reduction_buffer,
- const unsigned long realSize,
- __local volatile FPTYPE * sum)
- {
- unsigned long t = get_local_id(0);
- unsigned long wsz = get_local_size(0);
- // unsigned long ng = get_num_groups(0);
- unsigned long gid = get_group_id(0);
- unsigned long gsz = get_global_size(0);
- unsigned long initid = get_global_id(0);
- reduction_buffer[initid] = 0;
- // __local volatile FPTYPE sum[get_local_size(0)];
- // in case of half of a work group is enough for realSize
- // sum[t]=0;
- unsigned long n = (gid*wsz)+t;
- FPTYPE sumTemp = 0;
- // calculate locally
- while (n< realSize) {
- sumTemp+= vec_1[n] * vec_2[n];
- n+=gsz;
- }
- sum[t]=sumTemp;
- // sum[t]=gid;
- //barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE);
- // locally reduction
- unsigned long kf = wsz;
- while (kf > 1) {
- // for (unsigned long kf=wsz/2; kf >0; kf/=2) {
- kf /=2;
- // barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE);
- barrier(CLK_LOCAL_MEM_FENCE);
- if(t<kf) sum[t] +=sum[t+kf];
- }
- // barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE);
- // if (t == 0) reduction_buffer[gid]=sum[0];
- if (t == 0) reduction_buffer[gid]=gid;
- // reduction_buffer[gid]=sum[0];
- // mem_fence(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE);
- barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE);
- }