PageRenderTime 40ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/PRILOHY/PRILOHY_DO_REPOZITARE/WARC-TOOLS/warc-tools-read-only/lib/private/wbloc.c

http://github.com/MartinProkop/MojeWebarchivBakalarskaPrace
C | 321 lines | 149 code | 76 blank | 96 comment | 14 complexity | 70b6c3b435df7aa5d7b14fbb577b7369 MD5 | raw file
Possible License(s): Apache-2.0
  1. /* ------------------------------------------------------------------- */
  2. /* Copyright (c) 2007-2008 Hanzo Archives Limited. */
  3. /* */
  4. /* Licensed under the Apache License, Version 2.0 (the "License"); */
  5. /* you may not use this file except in compliance with the License. */
  6. /* You may obtain a copy of the License at */
  7. /* */
  8. /* http://www.apache.org/licenses/LICENSE-2.0 */
  9. /* */
  10. /* Unless required by applicable law or agreed to in writing, software */
  11. /* distributed under the License is distributed on an "AS IS" BASIS, */
  12. /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
  13. /* implied. */
  14. /* See the License for the specific language governing permissions and */
  15. /* limitations under the License. */
  16. /* */
  17. /* You may find more information about Hanzo Archives at */
  18. /* */
  19. /* http://www.hanzoarchives.com/ */
  20. /* */
  21. /* You may find more information about the WARC Tools project at */
  22. /* */
  23. /* http://code.google.com/p/warc-tools/ */
  24. /* ------------------------------------------------------------------- */
  25. /*
  26. * Portability header file
  27. */
  28. #include <wport.h>
  29. /*
  30. * WARC default headers
  31. */
  32. #include <wclass.h> /* bless, destroy, cassert, struct Class */
  33. #include <wsign.h> /* CASSERT macro */
  34. #include <wbloc.h> /* for class's prototypes */
  35. #include <wbloc.x> /* for private class's prototypes */
  36. #include <wmem.h> /* wmalloc, wfree */
  37. #include <wmisc.h> /* unless, ... */
  38. #include <wcsafe.h> /* w_strncpy, ... */
  39. #include <wrecord.h>
  40. #include <wmktmp.h>
  41. #define SIGN 22
  42. /**
  43. * WARC Record Data Bloc Class
  44. */
  45. struct WBloc
  46. {
  47. const void * class;
  48. /*@{*/
  49. warc_u32_t allocation_unit; /**< The amount of Byte to allocate */
  50. warc_u8_t * buffer ; /**< The buffer whcich will contain the data */
  51. warc_bool_t eob; /**< end of bloc */
  52. void * wfile; /**< The Warc File wher the bloc will be got */
  53. void * wrecord; /**< The WRecord class instance containg the bloc */
  54. void * wtfile; /**< The WTempFile object that will contain the data bloc */
  55. warc_u8_t http_code[4]; /**< Will contain the http response code */
  56. warc_u32_t last_size ; /**< The size of the last read chunk */
  57. /*@}*/
  58. };
  59. #define ALLOC (self -> allocation_unit)
  60. #define BUFF (self -> buffer)
  61. #define WFILE (self -> wfile)
  62. #define RECORD (self -> wrecord)
  63. #define WTFILE (self -> wtfile)
  64. #define CODE (self -> http_code)
  65. #define EOB (self -> eob)
  66. #define LASTSIZE (self -> last_size)
  67. #define DEFAULT_UNIT 64 * 1024
  68. /**
  69. * @param _self: a WBloc object instance
  70. * @param bytes: will contain the number of got bytes
  71. *
  72. * @return A chunk from the content bloc as an warc_u8_t array
  73. *
  74. * Returns a chunk from the data Bloc of a WRecord
  75. * without the encapsulated HTTP response if existing
  76. */
  77. WPUBLIC warc_u8_t * WBloc_next (void * _self)
  78. {
  79. struct WBloc * self = _self;
  80. FILE * tfile = NIL;
  81. warc_u32_t size = 0;
  82. /* Preconditions */
  83. CASSERT (self);
  84. tfile = WTempFile_handle (WTFILE);
  85. if(EOB || feof (tfile))
  86. {
  87. w_fseek_start (tfile);
  88. EOB = WARC_FALSE;
  89. LASTSIZE = 0;
  90. return (NIL);
  91. }
  92. size = w_fread (BUFF, 1, ALLOC, tfile);
  93. BUFF [size] = '\0';
  94. LASTSIZE = size;
  95. if(size < ALLOC)
  96. {
  97. EOB = WARC_TRUE;
  98. return (BUFF);
  99. }
  100. return (BUFF);
  101. }
  102. /**
  103. * @param _self: a WBloc object instance
  104. * @param tmpfile: temporary FILE * handle
  105. * @return WARC_FALSE if the copy succeeds. Otherwise WARC_TRUE
  106. *
  107. * Copy data from WBloc internal temporary file to an external
  108. * FILE * handle (to use only with Ruby - SWIG interface).
  109. */
  110. WPUBLIC warc_bool_t WBloc_copyPayloadToTemporary (void * _self, int tmpfile)
  111. {
  112. struct WBloc * self = _self;
  113. /* Preconditions */
  114. CASSERT (self);
  115. assert (tmpfile);
  116. while (WBloc_next (self))
  117. {
  118. write(tmpfile, BUFF, LASTSIZE);
  119. if (WARC_TRUE == EOB)
  120. break;
  121. }
  122. /* rewind the internal filehandle for future usage */
  123. w_fseek_start (WTempFile_handle (WTFILE));
  124. /* something wrong happens */
  125. if (WARC_FALSE == EOB)
  126. return (WARC_TRUE);
  127. /* everything went fine */
  128. return (EOB = WARC_FALSE);
  129. }
  130. /**
  131. * @param _self: A WBloc object instance
  132. *
  133. * @return the size of the last read chunk size
  134. *
  135. * Last Read chunk size recovering function
  136. */
  137. WPUBLIC warc_u32_t WBloc_getLastChunkSize (const void * const _self)
  138. {
  139. const struct WBloc * const self = _self;
  140. /* Precondtionx */
  141. CASSERT(self);
  142. return (LASTSIZE);
  143. }
  144. /**
  145. * @param _self: a WBloc object
  146. *
  147. * @return the http response code as a char[4] array
  148. *
  149. * WARC Record content HTTP response code returning function
  150. */
  151. WPUBLIC const warc_u8_t * WBloc_getHttpCode (const void * const _self)
  152. {
  153. const struct WBloc * const self = _self;
  154. /* Preconditions */
  155. CASSERT (self);
  156. /* preconditions */
  157. unless (CODE)
  158. return (NIL);
  159. if (CODE[0] == '\0')
  160. return (NIL);
  161. return (CODE);
  162. }
  163. /**
  164. * WBloc_constructor - create a new WBloc object instance
  165. *
  166. * @param _self WBloc class object
  167. * @param app: constructor list parameters
  168. *
  169. * @return a valid WBloc object or NIL
  170. *
  171. * @brief WARC Bloc constructor
  172. */
  173. WPRIVATE void * WBloc_constructor (void * _self, va_list * app)
  174. {
  175. struct WBloc * const self = _self;
  176. void * file = va_arg (* app, void *);
  177. void * record = va_arg (* app, void *);
  178. warc_bool_t httpheaders = va_arg (* app, const warc_bool_t);
  179. const warc_u32_t alloc = va_arg (* app, const warc_u32_t);
  180. warc_u32_t allocated = DEFAULT_UNIT;
  181. unless (record)
  182. {
  183. destroy(self);
  184. return (NIL);
  185. }
  186. /* if alloc = 0, use allocated */
  187. if (alloc)
  188. allocated = alloc;
  189. ALLOC = allocated;
  190. BUFF = wmalloc (ALLOC + 1);
  191. unless (BUFF)
  192. {
  193. destroy (self);
  194. return (NIL);
  195. }
  196. WTFILE = NIL;
  197. RECORD = record;
  198. WFILE = file;
  199. CODE [0] = '\0';
  200. EOB = WARC_FALSE;
  201. unless (RECORD && WFILE)
  202. {
  203. wfree (BUFF), BUFF = NIL;
  204. destroy (self);
  205. return (NIL);
  206. }
  207. WTFILE = WRecord_getBloc (RECORD, WFILE, httpheaders, CODE);
  208. unless (WTFILE)
  209. {
  210. destroy (self);
  211. return (NIL);
  212. }
  213. w_fseek_start (WTempFile_handle (WTFILE));
  214. LASTSIZE = 0;
  215. return (self);
  216. }
  217. /**
  218. * WBloc_destructor - delete an existing WBloc object
  219. *
  220. * @param _self WBloc object instance
  221. *
  222. * WARC Bloc destructor
  223. */
  224. WPRIVATE void * WBloc_destructor (void * _self)
  225. {
  226. struct WBloc * self = _self;
  227. /* preconditions */
  228. CASSERT (self);
  229. if (BUFF)
  230. wfree (BUFF), BUFF = NIL;
  231. if (WTFILE)
  232. destroy (WTFILE), WTFILE = NIL;
  233. LASTSIZE = 0;
  234. return (self);
  235. }
  236. /**
  237. * WARC WBloc class
  238. */
  239. static const struct Class _WBloc =
  240. {
  241. sizeof (struct WBloc),
  242. SIGN,
  243. WBloc_constructor, WBloc_destructor
  244. };
  245. const void * WBloc = & _WBloc;