PageRenderTime 49ms CodeModel.GetById 12ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/FFdecsa/parallel_128_16char.h

https://bitbucket.org/a84/gs_public/
C++ Header | 411 lines | 366 code | 26 blank | 19 comment | 1 complexity | ca4644287a9e353add915d00bedd6dfd MD5 | raw file
  1/* FFdecsa -- fast decsa algorithm
  2 *
  3 * Copyright (C) 2003-2004  fatih89r
  4 *
  5 * This program is free software; you can redistribute it and/or modify
  6 * it under the terms of the GNU General Public License as published by
  7 * the Free Software Foundation; either version 2 of the License, or
  8 * (at your option) any later version.
  9 *
 10 * This program is distributed in the hope that it will be useful,
 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 * GNU General Public License for more details.
 14 *
 15 * You should have received a copy of the GNU General Public License
 16 * along with this program; if not, write to the Free Software
 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 18 */
 19
 20
 21struct group_t{
 22  unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
 23};
 24typedef struct group_t group;
 25
 26#define GROUP_PARALLELISM 128
 27
 28group static inline FF0(){
 29  group res;
 30  res.s1=0x0;
 31  res.s2=0x0;
 32  res.s3=0x0;
 33  res.s4=0x0;
 34  res.s5=0x0;
 35  res.s6=0x0;
 36  res.s7=0x0;
 37  res.s8=0x0;
 38  res.s9=0x0;
 39  res.s10=0x0;
 40  res.s11=0x0;
 41  res.s12=0x0;
 42  res.s13=0x0;
 43  res.s14=0x0;
 44  res.s15=0x0;
 45  res.s16=0x0;
 46  return res;
 47}
 48
 49group static inline FF1(){
 50  group res;
 51  res.s1=0xff;
 52  res.s2=0xff;
 53  res.s3=0xff;
 54  res.s4=0xff;
 55  res.s5=0xff;
 56  res.s6=0xff;
 57  res.s7=0xff;
 58  res.s8=0xff;
 59  res.s9=0xff;
 60  res.s10=0xff;
 61  res.s11=0xff;
 62  res.s12=0xff;
 63  res.s13=0xff;
 64  res.s14=0xff;
 65  res.s15=0xff;
 66  res.s16=0xff;
 67  return res;
 68}
 69
 70group static inline FFAND(group a,group b){
 71  group res;
 72  res.s1=a.s1&b.s1;
 73  res.s2=a.s2&b.s2;
 74  res.s3=a.s3&b.s3;
 75  res.s4=a.s4&b.s4;
 76  res.s5=a.s5&b.s5;
 77  res.s6=a.s6&b.s6;
 78  res.s7=a.s7&b.s7;
 79  res.s8=a.s8&b.s8;
 80  res.s9=a.s9&b.s9;
 81  res.s10=a.s10&b.s10;
 82  res.s11=a.s11&b.s11;
 83  res.s12=a.s12&b.s12;
 84  res.s13=a.s13&b.s13;
 85  res.s14=a.s14&b.s14;
 86  res.s15=a.s15&b.s15;
 87  res.s16=a.s16&b.s16;
 88  return res;
 89}
 90
 91group static inline FFOR(group a,group b){
 92  group res;
 93  res.s1=a.s1|b.s1;
 94  res.s2=a.s2|b.s2;
 95  res.s3=a.s3|b.s3;
 96  res.s4=a.s4|b.s4;
 97  res.s5=a.s5|b.s5;
 98  res.s6=a.s6|b.s6;
 99  res.s7=a.s7|b.s7;
100  res.s8=a.s8|b.s8;
101  res.s9=a.s9|b.s9;
102  res.s10=a.s10|b.s10;
103  res.s11=a.s11|b.s11;
104  res.s12=a.s12|b.s12;
105  res.s13=a.s13|b.s13;
106  res.s14=a.s14|b.s14;
107  res.s15=a.s15|b.s15;
108  res.s16=a.s16|b.s16;
109  return res;
110}
111
112group static inline FFXOR(group a,group b){
113  group res;
114  res.s1=a.s1^b.s1;
115  res.s2=a.s2^b.s2;
116  res.s3=a.s3^b.s3;
117  res.s4=a.s4^b.s4;
118  res.s5=a.s5^b.s5;
119  res.s6=a.s6^b.s6;
120  res.s7=a.s7^b.s7;
121  res.s8=a.s8^b.s8;
122  res.s9=a.s9^b.s9;
123  res.s10=a.s10^b.s10;
124  res.s11=a.s11^b.s11;
125  res.s12=a.s12^b.s12;
126  res.s13=a.s13^b.s13;
127  res.s14=a.s14^b.s14;
128  res.s15=a.s15^b.s15;
129  res.s16=a.s16^b.s16;
130  return res;
131}
132
133group static inline FFNOT(group a){
134  group res;
135  res.s1=~a.s1;
136  res.s2=~a.s2;
137  res.s3=~a.s3;
138  res.s4=~a.s4;
139  res.s5=~a.s5;
140  res.s6=~a.s6;
141  res.s7=~a.s7;
142  res.s8=~a.s8;
143  res.s9=~a.s9;
144  res.s10=~a.s10;
145  res.s11=~a.s11;
146  res.s12=~a.s12;
147  res.s13=~a.s13;
148  res.s14=~a.s14;
149  res.s15=~a.s15;
150  res.s16=~a.s16;
151  return res;
152}
153
154
155/* 64 rows of 128 bits */
156
157void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
158  *(((int *)tab)+2*g)=*((int *)data);
159  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
160}
161
162void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
163  *((int *)data)=*(((int *)tab)+2*g);
164  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
165}
166
167void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
168  int j;
169  for(j=0;j<n;j++){
170    *(data+j)^=*(tab+8*g+j);
171  }
172}
173
174
175struct batch_t{
176  unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
177};
178typedef struct batch_t batch;
179
180#define BYTES_PER_BATCH 16
181
182batch static inline B_FFAND(batch a,batch b){
183  batch res;
184  res.s1=a.s1&b.s1;
185  res.s2=a.s2&b.s2;
186  res.s3=a.s3&b.s3;
187  res.s4=a.s4&b.s4;
188  res.s5=a.s5&b.s5;
189  res.s6=a.s6&b.s6;
190  res.s7=a.s7&b.s7;
191  res.s8=a.s8&b.s8;
192  res.s9=a.s9&b.s9;
193  res.s10=a.s10&b.s10;
194  res.s11=a.s11&b.s11;
195  res.s12=a.s12&b.s12;
196  res.s13=a.s13&b.s13;
197  res.s14=a.s14&b.s14;
198  res.s15=a.s15&b.s15;
199  res.s16=a.s16&b.s16;
200  return res;
201}
202
203batch static inline B_FFOR(batch a,batch b){
204  batch res;
205  res.s1=a.s1|b.s1;
206  res.s2=a.s2|b.s2;
207  res.s3=a.s3|b.s3;
208  res.s4=a.s4|b.s4;
209  res.s5=a.s5|b.s5;
210  res.s6=a.s6|b.s6;
211  res.s7=a.s7|b.s7;
212  res.s8=a.s8|b.s8;
213  res.s9=a.s9|b.s9;
214  res.s10=a.s10|b.s10;
215  res.s11=a.s11|b.s11;
216  res.s12=a.s12|b.s12;
217  res.s13=a.s13|b.s13;
218  res.s14=a.s14|b.s14;
219  res.s15=a.s15|b.s15;
220  res.s16=a.s16|b.s16;
221  return res;
222}
223
224batch static inline B_FFXOR(batch a,batch b){
225  batch res;
226  res.s1=a.s1^b.s1;
227  res.s2=a.s2^b.s2;
228  res.s3=a.s3^b.s3;
229  res.s4=a.s4^b.s4;
230  res.s5=a.s5^b.s5;
231  res.s6=a.s6^b.s6;
232  res.s7=a.s7^b.s7;
233  res.s8=a.s8^b.s8;
234  res.s9=a.s9^b.s9;
235  res.s10=a.s10^b.s10;
236  res.s11=a.s11^b.s11;
237  res.s12=a.s12^b.s12;
238  res.s13=a.s13^b.s13;
239  res.s14=a.s14^b.s14;
240  res.s15=a.s15^b.s15;
241  res.s16=a.s16^b.s16;
242  return res;
243}
244
245
246batch static inline B_FFN_ALL_29(){
247  batch res;
248  res.s1=0x29;
249  res.s2=0x29;
250  res.s3=0x29;
251  res.s4=0x29;
252  res.s5=0x29;
253  res.s6=0x29;
254  res.s7=0x29;
255  res.s8=0x29;
256  res.s9=0x29;
257  res.s10=0x29;
258  res.s11=0x29;
259  res.s12=0x29;
260  res.s13=0x29;
261  res.s14=0x29;
262  res.s15=0x29;
263  res.s16=0x29;
264  return res;
265}
266batch static inline B_FFN_ALL_02(){
267  batch res;
268  res.s1=0x02;
269  res.s2=0x02;
270  res.s3=0x02;
271  res.s4=0x02;
272  res.s5=0x02;
273  res.s6=0x02;
274  res.s7=0x02;
275  res.s8=0x02;
276  res.s9=0x02;
277  res.s10=0x02;
278  res.s11=0x02;
279  res.s12=0x02;
280  res.s13=0x02;
281  res.s14=0x02;
282  res.s15=0x02;
283  res.s16=0x02;
284  return res;
285}
286batch static inline B_FFN_ALL_04(){
287  batch res;
288  res.s1=0x04;
289  res.s2=0x04;
290  res.s3=0x04;
291  res.s4=0x04;
292  res.s5=0x04;
293  res.s6=0x04;
294  res.s7=0x04;
295  res.s8=0x04;
296  res.s9=0x04;
297  res.s10=0x04;
298  res.s11=0x04;
299  res.s12=0x04;
300  res.s13=0x04;
301  res.s14=0x04;
302  res.s15=0x04;
303  res.s16=0x04;
304  return res;
305}
306batch static inline B_FFN_ALL_10(){
307  batch res;
308  res.s1=0x10;
309  res.s2=0x10;
310  res.s3=0x10;
311  res.s4=0x10;
312  res.s5=0x10;
313  res.s6=0x10;
314  res.s7=0x10;
315  res.s8=0x10;
316  res.s9=0x10;
317  res.s10=0x10;
318  res.s11=0x10;
319  res.s12=0x10;
320  res.s13=0x10;
321  res.s14=0x10;
322  res.s15=0x10;
323  res.s16=0x10;
324  return res;
325}
326batch static inline B_FFN_ALL_40(){
327  batch res;
328  res.s1=0x40;
329  res.s2=0x40;
330  res.s3=0x40;
331  res.s4=0x40;
332  res.s5=0x40;
333  res.s6=0x40;
334  res.s7=0x40;
335  res.s8=0x40;
336  res.s9=0x40;
337  res.s10=0x40;
338  res.s11=0x40;
339  res.s12=0x40;
340  res.s13=0x40;
341  res.s14=0x40;
342  res.s15=0x40;
343  res.s16=0x40;
344  return res;
345}
346batch static inline B_FFN_ALL_80(){
347  batch res;
348  res.s1=0x80;
349  res.s2=0x80;
350  res.s3=0x80;
351  res.s4=0x80;
352  res.s5=0x80;
353  res.s6=0x80;
354  res.s7=0x80;
355  res.s8=0x80;
356  res.s9=0x80;
357  res.s10=0x80;
358  res.s11=0x80;
359  res.s12=0x80;
360  res.s13=0x80;
361  res.s14=0x80;
362  res.s15=0x80;
363  res.s16=0x80;
364  return res;
365}
366
367batch static inline B_FFSH8L(batch a,int n){
368  batch res;
369  res.s1=a.s1<<n;
370  res.s2=a.s2<<n;
371  res.s3=a.s3<<n;
372  res.s4=a.s4<<n;
373  res.s5=a.s5<<n;
374  res.s6=a.s6<<n;
375  res.s7=a.s7<<n;
376  res.s8=a.s8<<n;
377  res.s9=a.s9<<n;
378  res.s10=a.s10<<n;
379  res.s11=a.s11<<n;
380  res.s12=a.s12<<n;
381  res.s13=a.s13<<n;
382  res.s14=a.s14<<n;
383  res.s15=a.s15<<n;
384  res.s16=a.s16<<n;
385  return res;
386}
387
388batch static inline B_FFSH8R(batch a,int n){
389  batch res;
390  res.s1=a.s1>>n;
391  res.s2=a.s2>>n;
392  res.s3=a.s3>>n;
393  res.s4=a.s4>>n;
394  res.s5=a.s5>>n;
395  res.s6=a.s6>>n;
396  res.s7=a.s7>>n;
397  res.s8=a.s8>>n;
398  res.s9=a.s9>>n;
399  res.s10=a.s10>>n;
400  res.s11=a.s11>>n;
401  res.s12=a.s12>>n;
402  res.s13=a.s13>>n;
403  res.s14=a.s14>>n;
404  res.s15=a.s15>>n;
405  res.s16=a.s16>>n;
406  return res;
407}
408
409
410void static inline M_EMPTY(void){
411}