PageRenderTime 49ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/testdisk-6.13/src/file_pdf.c

#
C | 338 lines | 299 code | 11 blank | 28 comment | 63 complexity | 9d159816e1df9063a78da3c70152eb37 MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. File: file_pdf.c
  3. Copyright (C) 1998-2011 Christophe GRENIER <grenier@cgsecurity.org>
  4. This software is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License along
  13. with this program; if not, write the Free Software Foundation, Inc., 51
  14. Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  15. */
  16. #ifdef HAVE_CONFIG_H
  17. #include <config.h>
  18. #endif
  19. #ifdef HAVE_STRING_H
  20. #include <string.h>
  21. #endif
  22. #include <stdio.h>
  23. #ifdef HAVE_TIME_H
  24. #include <time.h>
  25. #endif
  26. #ifdef HAVE_STDLIB_H
  27. #include <stdlib.h> /* free */
  28. #endif
  29. #include <ctype.h>
  30. #include "types.h"
  31. #include "filegen.h"
  32. #include "memmem.h"
  33. #include "common.h"
  34. static void register_header_check_pdf(file_stat_t *file_stat);
  35. static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
  36. static void file_check_pdf(file_recovery_t *file_recovery);
  37. static void file_check_pdf_and_size(file_recovery_t *file_recovery);
  38. static void file_date_pdf(file_recovery_t *file_recovery);
  39. const file_hint_t file_hint_pdf= {
  40. .extension="pdf",
  41. .description="Portable Document Format, Adobe Illustrator",
  42. .min_header_distance=0,
  43. .max_filesize=PHOTOREC_MAX_FILE_SIZE,
  44. .recover=1,
  45. .enable_by_default=1,
  46. .register_header_check=&register_header_check_pdf
  47. };
  48. static const unsigned char pdf_header[] = { '%','P','D','F','-','1'};
  49. static void register_header_check_pdf(file_stat_t *file_stat)
  50. {
  51. register_header_check(0, pdf_header,sizeof(pdf_header), &header_check_pdf, file_stat);
  52. }
  53. static int hex(int c)
  54. {
  55. if(c>='0' && c<='9')
  56. return c-'0';
  57. if(c>='A' && c<='F')
  58. return c-'A'+10;
  59. if(c>='a' && c<='f')
  60. return c-'a'+10;
  61. return -1;
  62. }
  63. static void file_rename_pdf(const char *old_filename)
  64. {
  65. char title[512];
  66. const unsigned char pattern[6]={ '/', 'T', 'i', 't', 'l', 'e' };
  67. uint64_t offset;
  68. FILE *handle;
  69. unsigned char*buffer;
  70. unsigned int i;
  71. unsigned int j;
  72. int bsize;
  73. const unsigned char utf16[3]= { 0xfe, 0xff, 0x00};
  74. if((handle=fopen(old_filename, "rb"))==NULL)
  75. return;
  76. if(fseek(handle, 0, SEEK_END)<0)
  77. {
  78. fclose(handle);
  79. return;
  80. }
  81. offset=ftell(handle);
  82. offset=file_rsearch(handle, offset, pattern, sizeof(pattern));
  83. if(offset==0)
  84. {
  85. fclose(handle);
  86. return;
  87. }
  88. offset+=sizeof(pattern);
  89. if(fseek(handle, offset, SEEK_SET)<0)
  90. {
  91. fclose(handle);
  92. return ;
  93. }
  94. buffer=(unsigned char*)MALLOC(512);
  95. if((bsize=fread(buffer, 1, 512, handle)) <= 0)
  96. {
  97. free(buffer);
  98. fclose(handle);
  99. return ;
  100. }
  101. /* Skip spaces after /Title */
  102. for(i=0; i<bsize && buffer[i]==' '; i++);
  103. if(i==bsize)
  104. {
  105. /* Too much spaces */
  106. free(buffer);
  107. fclose(handle);
  108. return ;
  109. }
  110. if(buffer[i]=='<')
  111. {
  112. int s=i;
  113. /* hexa to ascii */
  114. j=s;
  115. buffer[j++]='(';
  116. for(s++; s+1<bsize && buffer[s]!='>'; s+=2)
  117. buffer[j++]=(hex(buffer[s])<<4) | hex(buffer[s+1]);
  118. buffer[j]=')';
  119. }
  120. j=0;
  121. if(buffer[i]=='(')
  122. {
  123. i++; /* Skip '(' */
  124. if(i+8<bsize && memcmp(&buffer[i], "\\376\\377", 8)==0)
  125. {
  126. /* escape utf-16 title */
  127. i+=8;
  128. while(i<bsize)
  129. {
  130. if(buffer[i]==')')
  131. break;
  132. if(i+4<bsize && buffer[i]=='\\' && isdigit(buffer[i+1]) &&
  133. isdigit(buffer[i+2]) && isdigit(buffer[i+3]))
  134. i+=4;
  135. else
  136. title[j++]=buffer[i++];
  137. }
  138. }
  139. else if(i+3<bsize && memcmp(&buffer[i], &utf16, 3)==0)
  140. {
  141. /* utf-16 title */
  142. i+=2;
  143. while(i<bsize)
  144. {
  145. if(buffer[i]==')')
  146. break;
  147. title[j++]=buffer[i+1];
  148. i+=2;
  149. }
  150. }
  151. else
  152. {
  153. /* ascii title */
  154. while(i<bsize && buffer[i]!=')')
  155. title[j++]=buffer[i++];
  156. }
  157. }
  158. else
  159. {
  160. free(buffer);
  161. fclose(handle);
  162. return ;
  163. }
  164. /* Try to avoid some double-extensions */
  165. if(j>4 &&
  166. (memcmp(&title[j-4], ".doc", 4)==0 ||
  167. memcmp(&title[j-4], ".xls", 4)==0))
  168. j-=4;
  169. else if(j>5 &&
  170. (memcmp(&title[j-5], ".docx", 5)==0 ||
  171. memcmp(&title[j-5], ".xlsx", 5)==0))
  172. j-=5;
  173. file_rename(old_filename, title, j, 0, NULL, 1);
  174. free(buffer);
  175. fclose(handle);
  176. }
  177. static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
  178. {
  179. if(memcmp(buffer,pdf_header,sizeof(pdf_header))==0)
  180. {
  181. const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'};
  182. const unsigned char *src;
  183. reset_file_recovery(file_recovery_new);
  184. if(td_memmem(buffer, buffer_size, "<</Illustrator ", 15) != NULL)
  185. file_recovery_new->extension="ai";
  186. else
  187. {
  188. file_recovery_new->extension=file_hint_pdf.extension;
  189. file_recovery_new->file_rename=&file_rename_pdf;
  190. }
  191. if((src=(const unsigned char *)td_memmem(buffer, 512, sig_linearized, sizeof(sig_linearized))) != NULL)
  192. {
  193. src+=sizeof(sig_linearized);
  194. for(; src<=buffer+512 && *src!='>'; src++)
  195. {
  196. if(*src=='/' && *(src+1)=='L')
  197. {
  198. src+=2;
  199. while(src<buffer+512 &&
  200. (*src==' ' || *src=='\t' || *src=='\n' || *src=='\r'))
  201. src++;
  202. file_recovery_new->calculated_file_size=0;
  203. while(src<buffer+512 &&
  204. *src>='0' && *src<='9')
  205. {
  206. file_recovery_new->calculated_file_size=file_recovery_new->calculated_file_size*10+(*src)-'0';
  207. src++;
  208. }
  209. file_recovery_new->data_check=&data_check_size;
  210. file_recovery_new->file_check=&file_check_pdf_and_size;
  211. return 1;
  212. }
  213. }
  214. }
  215. file_recovery_new->file_check=&file_check_pdf;
  216. return 1;
  217. }
  218. return 0;
  219. }
  220. static void file_check_pdf_and_size(file_recovery_t *file_recovery)
  221. {
  222. if(file_recovery->file_size>=file_recovery->calculated_file_size)
  223. {
  224. const unsigned int read_size=20;
  225. unsigned char buffer[20+3]; /* read_size+3 */
  226. int i;
  227. int taille;
  228. file_recovery->file_size=file_recovery->calculated_file_size;
  229. if(fseek(file_recovery->handle,file_recovery->file_size-read_size,SEEK_SET)<0)
  230. {
  231. file_recovery->file_size=0;
  232. return ;
  233. }
  234. taille=fread(buffer,1,read_size,file_recovery->handle);
  235. for(i=taille-4;i>=0;i--)
  236. {
  237. if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F')
  238. {
  239. file_date_pdf(file_recovery);
  240. return ;
  241. }
  242. }
  243. }
  244. file_recovery->file_size=0;
  245. }
  246. static void file_check_pdf(file_recovery_t *file_recovery)
  247. {
  248. const unsigned char pdf_footer[4]= { '%', 'E', 'O', 'F'};
  249. file_search_footer(file_recovery, pdf_footer, sizeof(pdf_footer), 0);
  250. file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
  251. file_date_pdf(file_recovery);
  252. }
  253. static void file_date_pdf(file_recovery_t *file_recovery)
  254. {
  255. const unsigned char pattern[14]={'x', 'a', 'p', ':', 'C', 'r', 'e', 'a', 't', 'e', 'D', 'a', 't', 'e'};
  256. uint64_t offset=0;
  257. unsigned int j=0;
  258. unsigned char*buffer=(unsigned char*)MALLOC(4096);
  259. if(fseek(file_recovery->handle, 0, SEEK_SET)<0)
  260. {
  261. free(buffer);
  262. return ;
  263. }
  264. while(offset < file_recovery->file_size)
  265. {
  266. int i;
  267. int bsize;
  268. if((bsize=fread(buffer, 1, 4096, file_recovery->handle))<=0)
  269. {
  270. free(buffer);
  271. return ;
  272. }
  273. for(i=0; i<bsize; i++)
  274. {
  275. if(buffer[i]==pattern[j])
  276. {
  277. if(++j==sizeof(pattern))
  278. {
  279. const unsigned char *date_asc;
  280. struct tm tm_time;
  281. if(fseek(file_recovery->handle, offset+i+1, SEEK_SET)<0)
  282. {
  283. free(buffer);
  284. return ;
  285. }
  286. if(fread(buffer, 1, 22, file_recovery->handle) < 22)
  287. {
  288. free(buffer);
  289. return ;
  290. }
  291. if(buffer[0]=='=' && (buffer[1]=='\'' || buffer[1]=='"'))
  292. date_asc=&buffer[2];
  293. else if(buffer[i]=='>')
  294. date_asc=&buffer[1];
  295. else
  296. {
  297. free(buffer);
  298. return ;
  299. }
  300. /* */
  301. memset(&tm_time, 0, sizeof(tm_time));
  302. tm_time.tm_sec=(date_asc[17]-'0')*10+(date_asc[18]-'0'); /* seconds 0-59 */
  303. tm_time.tm_min=(date_asc[14]-'0')*10+(date_asc[15]-'0'); /* minutes 0-59 */
  304. tm_time.tm_hour=(date_asc[11]-'0')*10+(date_asc[12]-'0'); /* hours 0-23*/
  305. tm_time.tm_mday=(date_asc[8]-'0')*10+(date_asc[9]-'0'); /* day of the month 1-31 */
  306. tm_time.tm_mon=(date_asc[5]-'0')*10+(date_asc[6]-'0')-1; /* month 0-11 */
  307. tm_time.tm_year=(date_asc[0]-'0')*1000+(date_asc[1]-'0')*100+
  308. (date_asc[2]-'0')*10+(date_asc[3]-'0')-1900; /* year */
  309. tm_time.tm_isdst = -1; /* unknown daylight saving time */
  310. file_recovery->time=mktime(&tm_time);
  311. free(buffer);
  312. return ;
  313. }
  314. }
  315. else
  316. j=0;
  317. }
  318. offset+=bsize;
  319. }
  320. free(buffer);
  321. }