PageRenderTime 64ms CodeModel.GetById 16ms app.highlight 34ms RepoModel.GetById 5ms app.codeStats 1ms

/src/opencl/sha1_kernel.cl

https://bitbucket.org/lcirvin/cse465-project
OpenCL | 232 lines | 206 code | 26 blank | 0 comment | 0 complexity | da6c2833aa218ae5704ac038bc0596c4 MD5 | raw file
  1/* 
  2   This code was largely inspired by 
  3   pyrit opencl kernel sha1 routines, royger's sha1 sample,
  4   and md5_opencl_kernel.cl inside jtr.
  5   Copyright 2011 by Samuele Giovanni Tonon 
  6   samu at linuxasylum dot net
  7   This program comes with ABSOLUTELY NO WARRANTY; express or
  8   implied .
  9   This is free software, and you are welcome to redistribute it
 10   under certain conditions; as expressed here 
 11   http://www.gnu.org/licenses/gpl-2.0.html
 12*/
 13
 14#ifdef cl_khr_byte_addressable_store
 15#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : disable
 16#endif
 17
 18#ifdef cl_nv_pragma_unroll
 19#define NVIDIA
 20#pragma OPENCL EXTENSION cl_nv_pragma_unroll : enable
 21#endif
 22
 23#ifdef NVIDIA
 24inline uint SWAP32(uint x)
 25{
 26	x = rotate(x, 16U);
 27	return ((x & 0x00FF00FF) << 8) + ((x >> 8) & 0x00FF00FF);
 28}
 29#else
 30#define SWAP32(a)	(as_uint(as_uchar4(a).wzyx))
 31#endif
 32
 33#define K0  0x5A827999
 34#define K1  0x6ED9EBA1
 35#define K2  0x8F1BBCDC
 36#define K3  0xCA62C1D6
 37
 38#define H1 0x67452301
 39#define H2 0xEFCDAB89
 40#define H3 0x98BADCFE
 41#define H4 0x10325476
 42#define H5 0xC3D2E1F0
 43
 44#ifndef uint32_t
 45#define uint32_t unsigned int
 46#endif
 47
 48
 49__kernel void sha1_crypt_kernel(__global uint *data_info,__global char *plain_key,  __global uint *digest){
 50    int t, gid, msg_pad;
 51    int stop, mmod;
 52    uint i, ulen;
 53    uint W[16], temp, A,B,C,D,E;
 54    uint num_keys = data_info[1];
 55
 56    gid = get_global_id(0);
 57    msg_pad = gid * data_info[0];
 58
 59    A = H1;
 60    B = H2;
 61    C = H3;
 62    D = H4;
 63    E = H5;
 64
 65#pragma unroll
 66    for (t = 1; t < 15; t++){
 67	W[t] = 0x00000000;
 68    }
 69    for(i = 0; i < data_info[0] && ((uchar) plain_key[msg_pad + i]) != 0x0 ; i++){
 70    }
 71
 72    stop = i / 4 ;
 73    for (t = 0 ; t < stop ; t++){
 74        W[t] = ((uchar)  plain_key[msg_pad + t * 4]) << 24;
 75        W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
 76        W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 2]) << 8;
 77        W[t] |= (uchar)  plain_key[msg_pad + t * 4 + 3];
 78    }
 79    mmod = i % 4;
 80    if ( mmod == 3){
 81        W[t] = ((uchar)  plain_key[msg_pad + t * 4]) << 24;
 82        W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
 83        W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 2]) << 8;
 84        W[t] |=  ((uchar) 0x80) ;
 85    } else if (mmod == 2) {
 86        W[t] = ((uchar)  plain_key[msg_pad + t * 4]) << 24;
 87        W[t] |= ((uchar) plain_key[msg_pad + t * 4 + 1]) << 16;
 88        W[t] |=  0x8000 ;
 89    } else if (mmod == 1) {
 90        W[t] = ((uchar)  plain_key[msg_pad + t * 4]) << 24;
 91        W[t] |=  0x800000 ;
 92    } else /*if (mmod == 0)*/ {
 93        W[t] =  0x80000000 ;
 94    }
 95    ulen = (i * 8) & 0xFFFFFFFF;
 96    W[15] =  ulen ;
 97
 98#undef R
 99#define R(t)                                              \
100(                                                         \
101    temp = W[(t -  3) & 0x0F] ^ W[(t - 8) & 0x0F] ^       \
102           W[(t - 14) & 0x0F] ^ W[ t      & 0x0F],        \
103    ( W[t & 0x0F] = rotate((int)temp,1) )                 \
104)
105
106#undef P
107#define P(a,b,c,d,e,x)                                    \
108{                                                         \
109    e += rotate((int)a,5) + F(b,c,d) + K + x; b = rotate((int)b,30);\
110}
111
112#ifdef NVIDIA
113#define F(x,y,z)	(z ^ (x & (y ^ z)))
114#else
115#define F(x,y,z)	bitselect(z, y, x)
116#endif
117#define K 0x5A827999
118
119  P( A, B, C, D, E, W[0]  );
120  P( E, A, B, C, D, W[1]  );
121  P( D, E, A, B, C, W[2]  );
122  P( C, D, E, A, B, W[3]  );
123  P( B, C, D, E, A, W[4]  );
124  P( A, B, C, D, E, W[5]  );
125  P( E, A, B, C, D, W[6]  );
126  P( D, E, A, B, C, W[7]  );
127  P( C, D, E, A, B, W[8]  );
128  P( B, C, D, E, A, W[9]  );
129  P( A, B, C, D, E, W[10] );
130  P( E, A, B, C, D, W[11] );
131  P( D, E, A, B, C, W[12] );
132  P( C, D, E, A, B, W[13] );
133  P( B, C, D, E, A, W[14] );
134  P( A, B, C, D, E, W[15] );
135  P( E, A, B, C, D, R(16) );
136  P( D, E, A, B, C, R(17) );
137  P( C, D, E, A, B, R(18) );
138  P( B, C, D, E, A, R(19) );
139
140#undef K
141#undef F
142
143#define F(x,y,z) (x ^ y ^ z)
144#define K 0x6ED9EBA1
145
146  P( A, B, C, D, E, R(20) );
147  P( E, A, B, C, D, R(21) );
148  P( D, E, A, B, C, R(22) );
149  P( C, D, E, A, B, R(23) );
150  P( B, C, D, E, A, R(24) );
151  P( A, B, C, D, E, R(25) );
152  P( E, A, B, C, D, R(26) );
153  P( D, E, A, B, C, R(27) );
154  P( C, D, E, A, B, R(28) );
155  P( B, C, D, E, A, R(29) );
156  P( A, B, C, D, E, R(30) );
157  P( E, A, B, C, D, R(31) );
158  P( D, E, A, B, C, R(32) );
159  P( C, D, E, A, B, R(33) );
160  P( B, C, D, E, A, R(34) );
161  P( A, B, C, D, E, R(35) );
162  P( E, A, B, C, D, R(36) );
163  P( D, E, A, B, C, R(37) );
164  P( C, D, E, A, B, R(38) );
165  P( B, C, D, E, A, R(39) );
166
167#undef K
168#undef F
169
170#ifdef NVIDIA
171#define F(x,y,z)	((x & y) | (z & (x | y)))
172#else
173#define F(x,y,z)	(bitselect(x, y, z) ^ bitselect(x, 0U, y))
174#endif
175#define K 0x8F1BBCDC
176
177  P( A, B, C, D, E, R(40) );
178  P( E, A, B, C, D, R(41) );
179  P( D, E, A, B, C, R(42) );
180  P( C, D, E, A, B, R(43) );
181  P( B, C, D, E, A, R(44) );
182  P( A, B, C, D, E, R(45) );
183  P( E, A, B, C, D, R(46) );
184  P( D, E, A, B, C, R(47) );
185  P( C, D, E, A, B, R(48) );
186  P( B, C, D, E, A, R(49) );
187  P( A, B, C, D, E, R(50) );
188  P( E, A, B, C, D, R(51) );
189  P( D, E, A, B, C, R(52) );
190  P( C, D, E, A, B, R(53) );
191  P( B, C, D, E, A, R(54) );
192  P( A, B, C, D, E, R(55) );
193  P( E, A, B, C, D, R(56) );
194  P( D, E, A, B, C, R(57) );
195  P( C, D, E, A, B, R(58) );
196  P( B, C, D, E, A, R(59) );
197
198#undef K
199#undef F
200
201#define F(x,y,z) (x ^ y ^ z)
202#define K 0xCA62C1D6
203
204  P( A, B, C, D, E, R(60) );
205  P( E, A, B, C, D, R(61) );
206  P( D, E, A, B, C, R(62) );
207  P( C, D, E, A, B, R(63) );
208  P( B, C, D, E, A, R(64) );
209  P( A, B, C, D, E, R(65) );
210  P( E, A, B, C, D, R(66) );
211  P( D, E, A, B, C, R(67) );
212  P( C, D, E, A, B, R(68) );
213  P( B, C, D, E, A, R(69) );
214  P( A, B, C, D, E, R(70) );
215  P( E, A, B, C, D, R(71) );
216  P( D, E, A, B, C, R(72) );
217  P( C, D, E, A, B, R(73) );
218  P( B, C, D, E, A, R(74) );
219  P( A, B, C, D, E, R(75) );
220  P( E, A, B, C, D, R(76) );
221  P( D, E, A, B, C, R(77) );
222  P( C, D, E, A, B, R(78) );
223  P( B, C, D, E, A, R(79) );
224
225#undef K
226#undef F
227  digest[gid] = SWAP32(A + H1);
228  digest[gid+1*num_keys] = SWAP32(B + H2);
229  digest[gid+2*num_keys] = SWAP32(C + H3);
230  digest[gid+3*num_keys] = SWAP32(D + H4);
231  digest[gid+4*num_keys] = SWAP32(E + H5);
232}