PageRenderTime 50ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/win32/crc_i386.c

https://github.com/LuaDist/zip
C | 310 lines | 198 code | 18 blank | 94 comment | 9 complexity | 0c9dc9adb5ac1f2803ace84309979cc6 MD5 | raw file
  1. /*
  2. Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
  3. See the accompanying file LICENSE, version 2000-Apr-09 or later
  4. (the contents of which are also included in zip.h) for terms of use.
  5. If, for some reason, all these files are missing, the Info-ZIP license
  6. also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
  7. */
  8. /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
  9. * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
  10. * Last revised: 07-Jan-2007
  11. *
  12. * Original coded (in crc_i386.asm) and put into the public domain
  13. * by Paul Kienitz and Christian Spieler.
  14. *
  15. * Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
  16. * fixed to assemble with masm by not using .model directive which makes
  17. * assumptions about segment alignment. Also,
  18. * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather
  19. * than lodsb, and other misc. changes resulting in the following performance
  20. * increases:
  21. *
  22. * unrolled loops NO_UNROLLED_LOOPS
  23. * *8 >8 <8 *8 >8 <8
  24. *
  25. * +54% +42% +35% +82% +52% +25%
  26. *
  27. * first item in each table is input buffer length, even multiple of 8
  28. * second item in each table is input buffer length, > 8
  29. * third item in each table is input buffer length, < 8
  30. *
  31. * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
  32. * Incorporated Rodney Brown's 32-bit-reads optimization as found in the
  33. * UNIX AS source crc_i386.S. This new code can be disabled by defining
  34. * the macro symbol NO_32_BIT_LOADS.
  35. *
  36. * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
  37. * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
  38. * (like the Pentium Pro, Pentium II, and probably some Pentium clones).
  39. * This optimization is controlled by the macro symbol __686 and is disabled
  40. * by default. (This default is based on the assumption that most users
  41. * do not yet work on a Pentium Pro or Pentium II machine ...)
  42. *
  43. * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++
  44. * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic,
  45. * confirmed correct working with MS VC++ (32-bit).
  46. *
  47. * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke
  48. * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its
  49. * own __asm {...} construct. For MSVC, a "#pragma warning" was added to
  50. * shut up the "no return value" warning message.
  51. *
  52. * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file.
  53. *
  54. * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier
  55. * switching between ASM vs. non-ASM builds, when handling makefiles.
  56. * Also enabled the 686 build by default, because there are hardly any
  57. * pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
  58. *
  59. * Revised 03-Jan-2006, Chr. Spieler
  60. * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
  61. * data buffer in loop body (adjust pointer only once in loop body and use
  62. * offsets to access each item); added additional support for the "unfolded
  63. * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
  64. *
  65. * Revised 07-Jan-2007, Chr. Spieler
  66. * Recognize additional conditional flag CRC_TABLE_ONLY that prevents
  67. * compilation of the crc32() function.
  68. *
  69. * FLAT memory model assumed.
  70. *
  71. * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
  72. * This results in shorter code at the expense of reduced performance.
  73. *
  74. */
  75. #include "../zip.h"
  76. #include "../crc32.h"
  77. #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
  78. #if !defined(PRE_686) && !defined(__686)
  79. # define __686
  80. #endif
  81. #ifndef ZCONST
  82. # define ZCONST const
  83. #endif
  84. /* Select wether the following inline-assember code is supported. */
  85. #if (defined(_MSC_VER) && _MSC_VER >= 700)
  86. #if (defined(_M_IX86) && _M_IX86 >= 300)
  87. # define MSC_INLINE_ASM_32BIT_SUPPORT
  88. /* Disable warning for no return value, typical of asm functions */
  89. # pragma warning( disable : 4035 )
  90. #endif
  91. #endif
  92. #if (defined(__BORLANDC__) && __BORLANDC__ >= 452)
  93. # define MSC_INLINE_ASM_32BIT_SUPPORT
  94. #endif
  95. #ifdef MSC_INLINE_ASM_32BIT_SUPPORT
  96. /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */
  97. /*
  98. * These two (three) macros make up the loop body of the CRC32 cruncher.
  99. * registers modified:
  100. * eax : crc value "c"
  101. * esi : pointer to next data byte (or dword) "buf++"
  102. * registers read:
  103. * edi : pointer to base of crc_table array
  104. * scratch registers:
  105. * ebx : index into crc_table array
  106. * (requires upper three bytes = 0 when __686 is undefined)
  107. */
  108. #ifndef __686
  109. #define Do_CRC { \
  110. __asm { mov bl, al }; \
  111. __asm { shr eax, 8 }; \
  112. __asm { xor eax, [edi+ebx*4] }; }
  113. #else /* __686 */
  114. #define Do_CRC { \
  115. __asm { movzx ebx, al }; \
  116. __asm { shr eax, 8 }; \
  117. __asm { xor eax, [edi+ebx*4] }; }
  118. #endif /* ?__686 */
  119. #define Do_CRC_byte { \
  120. __asm { xor al, byte ptr [esi] }; \
  121. __asm { inc esi }; \
  122. Do_CRC; }
  123. #define Do_CRC_byteof(ofs) { \
  124. __asm { xor al, byte ptr [esi+(ofs)] }; \
  125. Do_CRC; }
  126. #ifndef NO_32_BIT_LOADS
  127. #ifdef IZ_CRCOPTIM_UNFOLDTBL
  128. # define SavLen len /* the edx register is needed elsewhere */
  129. # define UpdCRC_dword { \
  130. __asm { movzx ebx,al }; \
  131. __asm { mov edx,[edi+ebx*4+3072] }; \
  132. __asm { movzx ebx,ah }; \
  133. __asm { shr eax,16 }; \
  134. __asm { xor edx,[edi+ebx*4+2048] }; \
  135. __asm { movzx ebx,al }; \
  136. __asm { shr eax,8 }; \
  137. __asm { xor edx,[edi+ebx*4+1024] }; \
  138. __asm { mov eax,[edi+eax*4] }; \
  139. __asm { xor eax,edx }; }
  140. # define UpdCRC_dword_sh(dwPtrIncr) { \
  141. __asm { movzx ebx,al }; \
  142. __asm { mov edx,[edi+ebx*4+3072] }; \
  143. __asm { movzx ebx,ah }; \
  144. __asm { xor edx,[edi+ebx*4+2048] }; \
  145. __asm { shr eax,16 }; \
  146. __asm { movzx ebx,al }; \
  147. __asm { add esi, 4*dwPtrIncr }; \
  148. __asm { shr eax,8 }; \
  149. __asm { xor edx,[edi+ebx*4+1024] }; \
  150. __asm { mov eax,[edi+eax*4] }; \
  151. __asm { xor eax,edx }; }
  152. #else /* !IZ_CRCOPTIM_UNFOLDTBL */
  153. # define SavLen edx /* the edx register is free for use here */
  154. # define UpdCRC_dword { \
  155. Do_CRC; \
  156. Do_CRC; \
  157. Do_CRC; \
  158. Do_CRC; }
  159. # define UpdCRC_dword_sh(dwPtrIncr) { \
  160. Do_CRC; \
  161. Do_CRC; \
  162. __asm { add esi, 4*(dwPtrIncr) }; \
  163. Do_CRC; \
  164. Do_CRC; }
  165. #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
  166. #define Do_CRC_dword { \
  167. __asm { xor eax, dword ptr [esi] }; \
  168. UpdCRC_dword_sh(1); }
  169. #define Do_CRC_4dword { \
  170. __asm { xor eax, dword ptr [esi] }; \
  171. UpdCRC_dword; \
  172. __asm { xor eax, dword ptr [esi+4] }; \
  173. UpdCRC_dword; \
  174. __asm { xor eax, dword ptr [esi+8] }; \
  175. UpdCRC_dword; \
  176. __asm { xor eax, dword ptr [esi+12] }; \
  177. UpdCRC_dword_sh(4); }
  178. #endif /* !NO_32_BIT_LOADS */
  179. /* ========================================================================= */
  180. ulg crc32(crc, buf, len)
  181. ulg crc; /* crc shift register */
  182. ZCONST uch *buf; /* pointer to bytes to pump through */
  183. extent len; /* number of bytes in buf[] */
  184. /* Run a set of bytes through the crc shift register. If buf is a NULL
  185. pointer, then initialize the crc shift register contents instead.
  186. Return the current crc in either case. */
  187. {
  188. __asm {
  189. push edx
  190. push ecx
  191. mov esi,buf ;/* 2nd arg: uch *buf */
  192. sub eax,eax ;/*> if (!buf) */
  193. test esi,esi ;/*> return 0; */
  194. jz fine ;/*> else { */
  195. call get_crc_table
  196. mov edi,eax
  197. mov eax,crc ;/* 1st arg: ulg crc */
  198. #ifndef __686
  199. sub ebx,ebx ;/* ebx=0; => bl usable as a dword */
  200. #endif
  201. mov ecx,len ;/* 3rd arg: extent len */
  202. not eax ;/*> c = ~crc; */
  203. test ecx,ecx
  204. #ifndef NO_UNROLLED_LOOPS
  205. jz bail
  206. # ifndef NO_32_BIT_LOADS
  207. align_loop:
  208. test esi,3 ;/* align buf pointer on next */
  209. jz aligned_now ;/* dword boundary */
  210. }
  211. Do_CRC_byte ;
  212. __asm {
  213. dec ecx
  214. jnz align_loop
  215. aligned_now:
  216. # endif /* !NO_32_BIT_LOADS */
  217. mov SavLen,ecx ;/* save current len for later */
  218. shr ecx,4 ;/* ecx = len / 16 */
  219. jz No_Sixteens
  220. ; align loop head at start of 486 internal cache line !!
  221. align 16
  222. Next_Sixteen:
  223. }
  224. # ifndef NO_32_BIT_LOADS
  225. Do_CRC_4dword ;
  226. # else /* NO_32_BIT_LOADS */
  227. Do_CRC_byteof(0) ;
  228. Do_CRC_byteof(1) ;
  229. Do_CRC_byteof(2) ;
  230. Do_CRC_byteof(3) ;
  231. Do_CRC_byteof(4) ;
  232. Do_CRC_byteof(5) ;
  233. Do_CRC_byteof(6) ;
  234. Do_CRC_byteof(7) ;
  235. Do_CRC_byteof(8) ;
  236. Do_CRC_byteof(9) ;
  237. Do_CRC_byteof(10) ;
  238. Do_CRC_byteof(11) ;
  239. Do_CRC_byteof(12) ;
  240. Do_CRC_byteof(13) ;
  241. Do_CRC_byteof(14) ;
  242. Do_CRC_byteof(15) ;
  243. __asm { add esi,16 };
  244. # endif /* ?NO_32_BIT_LOADS */
  245. __asm {
  246. dec ecx
  247. jnz Next_Sixteen
  248. No_Sixteens:
  249. mov ecx,SavLen
  250. and ecx,00000000FH ;/* ecx = len % 16 */
  251. # ifndef NO_32_BIT_LOADS
  252. shr ecx,2
  253. jz No_Fours
  254. Next_Four:
  255. }
  256. Do_CRC_dword ;
  257. __asm {
  258. dec ecx
  259. jnz Next_Four
  260. No_Fours:
  261. mov ecx,SavLen
  262. and ecx,000000003H ;/* ecx = len % 4 */
  263. # endif /* !NO_32_BIT_LOADS */
  264. #endif /* !NO_UNROLLED_LOOPS */
  265. jz bail ;/*> if (len) */
  266. ; align loop head at start of 486 internal cache line !!
  267. align 16
  268. loupe: ;/*> do { */
  269. }
  270. Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/
  271. __asm {
  272. dec ecx ;/*> } while (--len); */
  273. jnz loupe
  274. bail: ;/*> } */
  275. not eax ;/*> return ~c; */
  276. fine:
  277. pop ecx
  278. pop edx
  279. }
  280. #ifdef NEED_RETURN
  281. return _EAX;
  282. #endif
  283. }
  284. #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */
  285. #if (defined(_MSC_VER) && _MSC_VER >= 700)
  286. #if (defined(_M_IX86) && _M_IX86 >= 300)
  287. /* Reenable missing return value warning */
  288. # pragma warning( default : 4035 )
  289. #endif
  290. #endif
  291. #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */