/src/zziplib/zzip/fseeko.c
C | 685 lines | 458 code | 48 blank | 179 comment | 112 complexity | d93b017247a53ff2529a54aaf6407560 MD5 | raw file
1 2/* 3 * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip). 4 * ================== 5 * 6 * These routines are fully independent from the traditional zzip 7 * implementation. They assume a readonly seekable stdio handle 8 * representing a complete zip file. The functions show how to 9 * parse the structure, find files and return a decoded bytestream. 10 * 11 * These routines are a bit simple and really here for documenting 12 * the way to access a zip file. The complexity of zip access comes 13 * from staggered reading of bytes and reposition of a filepointer in 14 * a big archive with lots of files and long compressed datastreams. 15 * Plus varaints of drop-in stdio replacements, obfuscation routines, 16 * auto fileextensions, drop-in dirent replacements, and so on... 17 * 18 * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted 19 * relation to a linear seek value as specified in zip info headers. In 20 * general it is not a problem if your system has no fseeko/ftello pair 21 * since we can fallback to fseek/ftell which limits the zip disk size 22 * to 2MiBs but the zip-storable seek values are 32bit limited anyway. 23 * 24 * Author: 25 * Guido Draheim <guidod@gmx.de> 26 * 27 * Copyright (c) 2003,2004 Guido Draheim 28 * All rights reserved, 29 * use under the restrictions of the 30 * Lesser GNU General Public License 31 * or alternatively the restrictions 32 * of the Mozilla Public License 1.1 33 */ 34 35#define _LARGEFILE_SOURCE 1 36#define _ZZIP_ENTRY_STRUCT 1 37 38#include <zzip/fseeko.h> 39 40#include <zzip/fetch.h> 41#include <zzip/__mmap.h> 42#include <zzip/__fnmatch.h> 43 44#include <assert.h> 45#include <stdlib.h> 46#include <sys/stat.h> 47 48#if defined ZZIP_HAVE_STRING_H 49#include <string.h> 50#elif defined ZZIP_HAVE_STRINGS_H 51#include <strings.h> 52#endif 53 54#if defined ZZIP_HAVE_STDINT_H 55#include <stdint.h> 56#endif 57 58#if __STDC_VERSION__+0 > 199900L 59#define ___ 60#define ____ 61#else 62#define ___ { 63#define ____ } 64#endif 65 66#ifndef ZZIP_HAVE_FSEEKO 67#define fseeko fseek 68#define ftello ftell 69#endif 70 71/* note that the struct zzip_entry inherits the zzip_disk_entry values 72 * and usually carries a copy of its values (in disk format!). To make the 73 * following code more readable, we use a shorthand notation for the 74 * upcast needed in C (not needed in C++) as "disk_(entry)". 75 */ 76#ifdef __zzip_entry_extends_zzip_disk_entry 77#define disk_(_entry_) _entry_ 78#else 79#define disk_(_entry_) (& (_entry_)->head) 80#endif 81 82/* we try to round all seeks to the pagesize - since we do not use 83 * the sys/mmap interface we have to guess a good value here: */ 84#define PAGESIZE 8192 85 86/* ====================================================================== */ 87 88/* helper functions */ 89 90/** => zzip_entry_data_offset 91 * This functions read the correspoding struct zzip_file_header from 92 * the zip disk of the given "entry". The returned off_t points to the 93 * end of the file_header where the current fseek pointer has stopped. 94 * This is used to immediatly parse out any filename/extras block following 95 * the file_header. The return value is null on error. 96 */ 97static zzip_off_t 98zzip_entry_fread_file_header(ZZIP_ENTRY * entry, 99 struct zzip_file_header *file_header) 100{ 101 if (! entry || ! file_header) 102 return 0; 103 ___ zzip_off_t offset = zzip_disk_entry_fileoffset(disk_(entry)); 104 if (0 > offset || offset >= entry->disksize) 105 return 0; 106 107 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) return 0; 108 return (fread(file_header, sizeof(*file_header), 1, entry->diskfile) 109 ? offset + sizeof(*file_header) : 0); 110 ____; 111} 112 113/** helper functions for (fseeko) zip access api 114 * 115 * This functions returns the seekval offset of the data portion of the 116 * file referenced by the given zzip_entry. It requires an intermediate 117 * check of the file_header structure (i.e. it reads it from disk). After 118 * this call, the contained diskfile readposition is already set to the 119 * data_offset returned here. On error -1 is returned. 120 */ 121zzip_off_t 122zzip_entry_data_offset(ZZIP_ENTRY * entry) 123{ 124 struct zzip_file_header file_header; 125 if (! entry) 126 return -1; 127 ___ zzip_off_t offset = zzip_entry_fread_file_header(entry, &file_header); 128 if (! offset) 129 return -1; 130 offset += zzip_file_header_sizeof_tails(&file_header); 131 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) 132 return -1; 133 return offset; 134 ____; 135} 136 137/** => zzip_entry_data_offset 138 * This function is a big helper despite its little name: in a zip file the 139 * encoded filenames are usually NOT zero-terminated but for common usage 140 * with libc we need it that way. Secondly, the filename SHOULD be present 141 * in the zip central directory but if not then we fallback to the filename 142 * given in the file_header of each compressed data portion. 143 */ 144zzip__new__ char * 145zzip_entry_strdup_name(ZZIP_ENTRY * entry) 146{ 147 if (! entry) 148 return 0; 149 150 ___ zzip_size_t len; 151 if ((len = zzip_disk_entry_namlen(disk_(entry)))) 152 { 153 char *name = malloc(len + 1); 154 if (! name) 155 return 0; 156 memcpy(name, entry->tail, len); 157 name[len] = '\0'; 158 return name; 159 } 160 ___ auto struct zzip_file_header header; 161 if (zzip_entry_fread_file_header(entry, &header) 162 && (len = zzip_file_header_namlen(&header))) 163 { 164 char *name = malloc(len + 1); 165 if (! name) { 166 return 0; 167 } else { 168 zzip_size_t n = fread(name, 1, len, entry->diskfile); 169 if (n != len) { 170 free (name); 171 return 0; 172 } 173 name[n] = '\0'; 174 return name; 175 } 176 } 177 return 0; 178 ____; 179 ____; 180} 181 182static int 183prescan_entry(ZZIP_ENTRY * entry) 184{ 185 assert(entry); 186 ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails(disk_(entry)); 187 if (tailsize + 1 > entry->tailalloc) 188 { 189 char *newtail = realloc(entry->tail, tailsize + 1); 190 if (! newtail) 191 return ENOMEM; 192 entry->tail = newtail; 193 entry->tailalloc = tailsize + 1; 194 } 195# ifdef SIZE_MAX /* from stdint.h */ 196 if (tailsize > (zzip_off_t)(SIZE_MAX)) { return EFBIG; } 197# endif 198 ___ zzip_size_t readsize = fread(entry->tail, 1, tailsize, entry->diskfile); 199 /* name + comment + extras */ 200 if ((zzip_off_t)readsize != tailsize) { 201 return errno; 202 } else { 203 return 0; 204 } ____; ____; 205} 206 207static void 208prescan_clear(ZZIP_ENTRY * entry) 209{ 210 assert(entry); 211 if (entry->tail) 212 free(entry->tail); 213 entry->tail = 0; 214 entry->tailalloc = 0; 215} 216 217/* ====================================================================== */ 218 219/** => zzip_entry_findfile 220 * 221 * This function is the first call of all the zip access functions here. 222 * It contains the code to find the first entry of the zip central directory. 223 * Here we require the stdio handle to represent a real zip file where the 224 * disk_trailer is _last_ in the file area, so that its position would be at 225 * a fixed offset from the end of the file area if not for the comment field 226 * allowed to be of variable length (which needs us to do a little search 227 * for the disk_tailer). However, in this simple implementation we disregard 228 * any disk_trailer info telling about multidisk archives, so we just return 229 * a pointer to the first entry in the zip central directory of that file. 230 * 231 * For an actual means, we are going to search backwards from the end 232 * of the mmaped block looking for the PK-magic signature of a 233 * disk_trailer. If we see one then we check the rootseek value to 234 * find the first disk_entry of the root central directory. If we find 235 * the correct PK-magic signature of a disk_entry over there then we 236 * assume we are done and we are going to return a pointer to that label. 237 * 238 * The return value is a pointer to the first zzip_disk_entry being checked 239 * to be within the bounds of the file area specified by the arguments. If 240 * no disk_trailer was found then null is returned, and likewise we only 241 * accept a disk_trailer with a seekvalue that points to a disk_entry and 242 * both parts have valid PK-magic parts. Beyond some sanity check we try to 243 * catch a common brokeness with zip archives that still allows us to find 244 * the start of the zip central directory. 245 */ 246zzip__new__ ZZIP_ENTRY * 247zzip_entry_findfirst(FILE * disk) 248{ 249 if (! disk) 250 return 0; 251 if (fseeko(disk, 0, SEEK_END) == -1) 252 return 0; 253 ___ zzip_off_t disksize = ftello(disk); 254 if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer)) 255 return 0; 256 /* we read out chunks of 8 KiB in the hope to match disk granularity */ 257 ___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */ 258 ___ ZZIP_ENTRY *entry = malloc(sizeof(*entry)); 259 if (! entry) 260 return 0; 261 ___ unsigned char *buffer = malloc(pagesize); 262 if (! buffer) 263 goto nomem; 264 265 assert(pagesize / 2 > (zzip_off_t) sizeof(struct zzip_disk_trailer)); 266 /* at each step, we will fread a pagesize block which overlaps with the 267 * previous read by means of pagesize/2 step at the end of the while(1) */ 268 ___ zzip_off_t mapoffs = disksize & ~(pagesize - 1); 269 ___ zzip_off_t mapsize = disksize - mapoffs; 270 if (mapoffs && mapsize < pagesize / 2) 271 { 272 mapoffs -= pagesize / 2; 273 mapsize += pagesize / 2; 274 } 275 assert(mapsize < 3*8192); 276 while (1) 277 { 278 if (fseeko(disk, mapoffs, SEEK_SET) == -1) 279 goto error; 280 if (fread(buffer, 1, mapsize, disk) != (zzip_size_t)mapsize) 281 goto error; 282 ___ unsigned char *p = 283 buffer + mapsize - sizeof(struct zzip_disk_trailer); 284 for (; p >= buffer; p--) 285 { 286 zzip_off_t root; /* (struct zzip_disk_entry*) */ 287 if (zzip_disk_trailer_check_magic(p)) 288 { 289 root = zzip_disk_trailer_rootseek((struct zzip_disk_trailer *) 290 p); 291 if (root > disksize - (long) sizeof(struct zzip_disk_trailer)) 292 { 293 /* first disk_entry is after the disk_trailer? can't be! */ 294 struct zzip_disk_trailer *trailer = 295 (struct zzip_disk_trailer *) p; 296 zzip_off_t rootsize = zzip_disk_trailer_rootsize(trailer); 297 if (rootsize > mapoffs) 298 continue; 299 /* a common brokeness that can be fixed: we just assume the 300 * central directory was written directly before : */ 301 root = mapoffs - rootsize; 302 } 303 } else if (zzip_disk64_trailer_check_magic(p)) 304 { 305 struct zzip_disk64_trailer *trailer = 306 (struct zzip_disk64_trailer *) p; 307 if (sizeof(zzip_off_t) < 8) 308 return 0; 309 root = zzip_disk64_trailer_rootseek(trailer); 310 } else 311 continue; 312 313 assert(0 <= root && root < mapsize); 314 if (fseeko(disk, root, SEEK_SET) == -1) 315 goto error; 316 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), disk) 317 != sizeof(*disk_(entry))) goto error; 318 if (zzip_disk_entry_check_magic(entry)) 319 { 320 free(buffer); 321 entry->headseek = root; 322 entry->diskfile = disk; 323 entry->disksize = disksize; 324 if (prescan_entry(entry)) 325 goto nomem; 326 return entry; 327 } 328 } 329 ____; 330 if (! mapoffs) 331 break; 332 assert(mapsize >= pagesize / 2); 333 mapoffs -= pagesize / 2; /* mapsize += pagesize/2; */ 334 mapsize = pagesize; /* if (mapsize > pagesize) ... */ 335 if (disksize - mapoffs > 64 * 1024) 336 break; 337 } 338 error: 339 free(buffer); 340 nomem: 341 free(entry); 342 ____; 343 ____; 344 ____; 345 ____; 346 ____; 347 ____; 348 return 0; 349} 350 351/** => zzip_entry_findfile 352 * 353 * This function takes an existing "entry" in the central root directory 354 * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry. 355 * On error it returns 0, otherwise the old entry. If no further match is 356 * found then null is returned and the entry already free()d. If you want 357 * to stop searching for matches before that case then please call 358 * => zzip_entry_free on the cursor struct ZZIP_ENTRY. 359 */ 360zzip__new__ ZZIP_ENTRY * 361zzip_entry_findnext(ZZIP_ENTRY * _zzip_restrict entry) 362{ 363 if (! entry) 364 return entry; 365 if (! zzip_disk_entry_check_magic(entry)) 366 goto err; 367 ___ zzip_off_t seek = 368 entry->headseek + zzip_disk_entry_sizeto_end(disk_(entry)); 369 if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize) 370 goto err; 371 372 if (fseeko(entry->diskfile, seek, SEEK_SET) == -1) 373 goto err; 374 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile) 375 != sizeof(*disk_(entry))) goto err; 376 entry->headseek = seek; 377 if (! zzip_disk_entry_check_magic(entry)) 378 goto err; 379 if (prescan_entry(entry)) 380 goto err; 381 return entry; 382 err: 383 zzip_entry_free(entry); 384 return 0; 385 ____; 386} 387 388/** => zzip_entry_findfile 389 * this function releases the malloc()ed areas needed for zzip_entry, the 390 * pointer is invalid afterwards. This function has #define synonyms of 391 * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch() 392 */ 393int 394zzip_entry_free(ZZIP_ENTRY * entry) 395{ 396 if (! entry) 397 return 0; 398 prescan_clear(entry); 399 free(entry); 400 return 1; 401} 402 403/** search for files in the (fseeko) zip central directory 404 * 405 * This function is given a filename as an additional argument, to find the 406 * disk_entry matching a given filename. The compare-function is usually 407 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used. 408 * - use null as argument for "old"-entry when searching the first 409 * matching entry, otherwise the last returned value if you look for other 410 * entries with a special "compare" function (if null then a doubled search 411 * is rather useless with this variant of _findfile). If no further entry is 412 * found then null is returned and any "old"-entry gets already free()d. 413 */ 414zzip__new__ ZZIP_ENTRY * 415zzip_entry_findfile(FILE * disk, char *filename, 416 ZZIP_ENTRY * _zzip_restrict entry, zzip_strcmp_fn_t compare) 417{ 418 if (! filename || ! disk) 419 return 0; 420 if (! entry) 421 entry = zzip_entry_findfirst(disk); 422 else 423 entry = zzip_entry_findnext(entry); 424 425 if (! compare) 426 compare = (zzip_strcmp_fn_t) (strcmp); 427 428 for (; entry; entry = zzip_entry_findnext(entry)) 429 { 430 /* filenames within zip files are often not null-terminated! */ 431 char *realname = zzip_entry_strdup_name(entry); 432 if (! realname) 433 continue; 434 if (! compare(filename, realname)) 435 { 436 free(realname); 437 return entry; 438 } else 439 { 440 free(realname); 441 continue; 442 } 443 } 444 return 0; 445} 446 447/** => zzip_entry_findfile 448 * 449 * This function uses a compare-function with an additional argument 450 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e. 451 * the argument filespec first and the ziplocal filename second with 452 * the integer-flags put in as third to the indirect call. If the 453 * platform has fnmatch available then null-compare will use that one 454 * and otherwise we fall back to mere strcmp, so if you need fnmatch 455 * searching then please provide an implementation somewhere else. 456 * - use null as argument for "after"-entry when searching the first 457 * matching entry, or the last disk_entry return-value to find the 458 * next entry matching the given filespec. If no further entry is 459 * found then null is returned and any "old"-entry gets already free()d. 460 */ 461zzip__new__ ZZIP_ENTRY * 462zzip_entry_findmatch(FILE * disk, char *filespec, 463 ZZIP_ENTRY * _zzip_restrict entry, 464 zzip_fnmatch_fn_t compare, int flags) 465{ 466 if (! filespec || ! disk) 467 return 0; 468 if (! entry) 469 entry = zzip_entry_findfirst(disk); 470 else 471 entry = zzip_entry_findnext(entry); 472 473 if (! compare) 474 compare = (zzip_fnmatch_fn_t) _zzip_fnmatch; 475 476 for (; entry; entry = zzip_entry_findnext(entry)) 477 { 478 /* filenames within zip files are often not null-terminated! */ 479 char *realname = zzip_entry_strdup_name(entry); 480 if (! realname) 481 continue; 482 if (! compare(filespec, realname, flags)) 483 { 484 free(realname); 485 return entry; 486 } else 487 { 488 free(realname); 489 continue; 490 } 491 } 492 return 0; 493} 494 495/* ====================================================================== */ 496 497/** 498 * typedef struct zzip_disk_file ZZIP_ENTRY_FILE; 499 */ 500struct zzip_entry_file /* : zzip_file_header */ 501{ 502 struct zzip_file_header header; /* fopen detected header */ 503 ZZIP_ENTRY *entry; /* fopen entry */ 504 zzip_off_t data; /* for stored blocks */ 505 zzip_size_t avail; /* memorized for checks on EOF */ 506 zzip_size_t compressed; /* compressed flag and datasize */ 507 zzip_size_t dataoff; /* offset from data start */ 508 z_stream zlib; /* for inflated blocks */ 509 unsigned char buffer[PAGESIZE]; /* work buffer for inflate algorithm */ 510}; 511 512/** open a file within a zip disk for reading 513 * 514 * This function does take an "entry" argument and copies it (or just takes 515 * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That 516 * structure contains also a zlib buffer for decoding. This function does 517 * seek to the file_header of the given "entry" and validates it for the 518 * data buffer following it. We do also prefetch some data from the data 519 * buffer thereby trying to match the disk pagesize for faster access later. 520 * The => zzip_entry_fread will then read in chunks of pagesizes which is 521 * the size of the internal readahead buffer. If an error occurs then null 522 * is returned. 523 */ 524zzip__new__ ZZIP_ENTRY_FILE * 525zzip_entry_fopen(ZZIP_ENTRY * entry, int takeover) 526{ 527 if (! entry) 528 return 0; 529 if (! takeover) 530 { 531 ZZIP_ENTRY *found = malloc(sizeof(*entry)); 532 if (! found) 533 return 0; 534 memcpy(found, entry, sizeof(*entry)); /* prescan_copy */ 535 found->tail = malloc(found->tailalloc); 536 if (! found->tail) 537 { free (found); return 0; } 538 memcpy(found->tail, entry->tail, entry->tailalloc); 539 entry = found; 540 } 541 ___ ZZIP_ENTRY_FILE *file = malloc(sizeof(*file)); 542 if (! file) 543 goto fail1; 544 file->entry = entry; 545 if (! zzip_entry_fread_file_header(entry, &file->header)) 546 goto fail2; 547 file->avail = zzip_file_header_usize(&file->header); 548 file->data = zzip_entry_data_offset(entry); 549 file->dataoff = 0; 550 551 if (! file->avail || zzip_file_header_data_stored(&file->header)) 552 { file->compressed = 0; return file; } 553 554 file->compressed = zzip_file_header_csize(&file->header); 555 file->zlib.opaque = 0; 556 file->zlib.zalloc = Z_NULL; 557 file->zlib.zfree = Z_NULL; 558 559 ___ zzip_off_t seek = file->data; 560 seek += sizeof(file->buffer); 561 seek -= seek & (sizeof(file->buffer) - 1); 562 assert(file->data < seek); /* pre-read to next PAGESIZE boundary... */ 563 if (fseeko(file->entry->diskfile, file->data + file->dataoff, SEEK_SET) == -1) 564 goto fail2; 565 file->zlib.next_in = file->buffer; 566 file->zlib.avail_in = fread(file->buffer, 1, seek - file->data, 567 file->entry->diskfile); 568 file->dataoff += file->zlib.avail_in; 569 ____; 570 571 if (! zzip_file_header_data_deflated(&file->header) 572 || inflateInit2(&file->zlib, -MAX_WBITS) != Z_OK) 573 goto fail2; 574 575 return file; 576 fail2: 577 free(file); 578 fail1: 579 zzip_entry_free(entry); 580 return 0; 581 ____; 582} 583 584/** => zzip_entry_fopen 585 * 586 * This function opens a file found by name, so it does a search into 587 * the zip central directory with => zzip_entry_findfile and whatever 588 * is found first is given to => zzip_entry_fopen 589 */ 590zzip__new__ ZZIP_ENTRY_FILE * 591zzip_entry_ffile(FILE * disk, char *filename) 592{ 593 ZZIP_ENTRY *entry = zzip_entry_findfile(disk, filename, 0, 0); 594 if (! entry) 595 return 0; 596 return zzip_entry_fopen(entry, 1); 597} 598 599 600/** => zzip_entry_fopen 601 * 602 * This function reads more bytes into the output buffer specified as 603 * arguments. The return value is null on eof or error, the stdio-like 604 * interface can not distinguish between these so you need to check 605 * with => zzip_entry_feof for the difference. 606 */ 607zzip_size_t 608zzip_entry_fread(void *ptr, zzip_size_t sized, zzip_size_t nmemb, 609 ZZIP_ENTRY_FILE * file) 610{ 611 if (! file) 612 return 0; 613 ___ zzip_size_t size = sized * nmemb; 614 if (! file->compressed) 615 { 616 if (size > file->avail) 617 size = file->avail; 618 if (fread(ptr, 1, size, file->entry->diskfile) != size) return 0; 619 file->dataoff += size; 620 file->avail -= size; 621 return size; 622 } 623 624 file->zlib.avail_out = size; 625 file->zlib.next_out = ptr; 626 ___ zzip_size_t total_old = file->zlib.total_out; 627 while (1) 628 { 629 if (! file->zlib.avail_in) 630 { 631 size = file->compressed - file->dataoff; 632 if (size > sizeof(file->buffer)) 633 size = sizeof(file->buffer); 634 /* fseek (file->data + file->dataoff, file->entry->diskfile); */ 635 file->zlib.avail_in = fread(file->buffer, 1, size, 636 file->entry->diskfile); 637 file->zlib.next_in = file->buffer; 638 file->dataoff += file->zlib.avail_in; 639 } 640 if (! file->zlib.avail_in) 641 return 0; 642 643 ___ int err = inflate(&file->zlib, Z_NO_FLUSH); 644 if (err == Z_STREAM_END) 645 file->avail = 0; 646 else if (err == Z_OK) 647 file->avail -= file->zlib.total_out - total_old; 648 else 649 return 0; 650 ____; 651 if (file->zlib.avail_out && ! file->zlib.avail_in) 652 continue; 653 return file->zlib.total_out - total_old; 654 } 655 ____; 656 ____; 657} 658 659/** => zzip_entry_fopen 660 * This function releases any zlib decoder info needed for decompression 661 * and dumps the ZZIP_ENTRY_FILE struct then. 662 */ 663int 664zzip_entry_fclose(ZZIP_ENTRY_FILE * file) 665{ 666 if (! file) 667 return 0; 668 if (file->compressed) 669 inflateEnd(&file->zlib); 670 zzip_entry_free(file->entry); 671 free(file); 672 return 0; 673} 674 675/** => zzip_entry_fopen 676 * 677 * This function allows to distinguish an error from an eof condition. 678 * Actually, if we found an error but we did already reach eof then we 679 * just keep on saying that it was an eof, so the app can just continue. 680 */ 681int 682zzip_entry_feof(ZZIP_ENTRY_FILE * file) 683{ 684 return ! file || ! file->avail; 685}