/contrib/groff/src/utils/indxbib/indxbib.cpp

https://bitbucket.org/freebsd/freebsd-head/ · C++ · 790 lines · 711 code · 44 blank · 35 comment · 193 complexity · c7b5d56cd121968c7c178347d043e526 MD5 · raw file

  1. // -*- C++ -*-
  2. /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2003, 2004
  3. Free Software Foundation, Inc.
  4. Written by James Clark (jjc@jclark.com)
  5. This file is part of groff.
  6. groff is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 2, or (at your option) any later
  9. version.
  10. groff is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License along
  15. with groff; see the file COPYING. If not, write to the Free Software
  16. Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  17. #include "lib.h"
  18. #include <stdlib.h>
  19. #include <assert.h>
  20. #include <errno.h>
  21. #include "posix.h"
  22. #include "errarg.h"
  23. #include "error.h"
  24. #include "stringclass.h"
  25. #include "cset.h"
  26. #include "cmap.h"
  27. #include "defs.h"
  28. #include "index.h"
  29. #include "nonposix.h"
  30. extern "C" const char *Version_string;
  31. #define DEFAULT_HASH_TABLE_SIZE 997
  32. #define TEMP_INDEX_TEMPLATE "indxbibXXXXXX"
  33. // (2^n - MALLOC_OVERHEAD) should be a good argument for malloc().
  34. #define MALLOC_OVERHEAD 16
  35. #ifdef BLOCK_SIZE
  36. #undef BLOCK_SIZE
  37. #endif
  38. const int BLOCK_SIZE = ((1024 - MALLOC_OVERHEAD - sizeof(struct block *)
  39. - sizeof(int)) / sizeof(int));
  40. struct block {
  41. block *next;
  42. int used;
  43. int v[BLOCK_SIZE];
  44. block(block *p = 0) : next(p), used(0) { }
  45. };
  46. struct block;
  47. union table_entry {
  48. block *ptr;
  49. int count;
  50. };
  51. struct word_list {
  52. word_list *next;
  53. char *str;
  54. int len;
  55. word_list(const char *, int, word_list *);
  56. };
  57. table_entry *hash_table;
  58. int hash_table_size = DEFAULT_HASH_TABLE_SIZE;
  59. // We make this the same size as hash_table so we only have to do one
  60. // mod per key.
  61. static word_list **common_words_table = 0;
  62. char *key_buffer;
  63. FILE *indxfp;
  64. int ntags = 0;
  65. string filenames;
  66. char *temp_index_file = 0;
  67. const char *ignore_fields = "XYZ";
  68. const char *common_words_file = COMMON_WORDS_FILE;
  69. int n_ignore_words = 100;
  70. int truncate_len = 6;
  71. int shortest_len = 3;
  72. int max_keys_per_item = 100;
  73. static void usage(FILE *stream);
  74. static void write_hash_table();
  75. static void init_hash_table();
  76. static void read_common_words_file();
  77. static int store_key(char *s, int len);
  78. static void possibly_store_key(char *s, int len);
  79. static int do_whole_file(const char *filename);
  80. static int do_file(const char *filename);
  81. static void store_reference(int filename_index, int pos, int len);
  82. static void check_integer_arg(char opt, const char *arg, int min, int *res);
  83. static void store_filename(const char *);
  84. static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp);
  85. static char *get_cwd();
  86. extern "C" {
  87. void cleanup();
  88. void catch_fatal_signals();
  89. void ignore_fatal_signals();
  90. }
  91. int main(int argc, char **argv)
  92. {
  93. program_name = argv[0];
  94. static char stderr_buf[BUFSIZ];
  95. setbuf(stderr, stderr_buf);
  96. const char *base_name = 0;
  97. typedef int (*parser_t)(const char *);
  98. parser_t parser = do_file;
  99. const char *directory = 0;
  100. const char *foption = 0;
  101. int opt;
  102. static const struct option long_options[] = {
  103. { "help", no_argument, 0, CHAR_MAX + 1 },
  104. { "version", no_argument, 0, 'v' },
  105. { NULL, 0, 0, 0 }
  106. };
  107. while ((opt = getopt_long(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw",
  108. long_options, NULL))
  109. != EOF)
  110. switch (opt) {
  111. case 'c':
  112. common_words_file = optarg;
  113. break;
  114. case 'd':
  115. directory = optarg;
  116. break;
  117. case 'f':
  118. foption = optarg;
  119. break;
  120. case 'h':
  121. check_integer_arg('h', optarg, 1, &hash_table_size);
  122. if (!is_prime(hash_table_size)) {
  123. while (!is_prime(++hash_table_size))
  124. ;
  125. warning("%1 not prime: using %2 instead", optarg, hash_table_size);
  126. }
  127. break;
  128. case 'i':
  129. ignore_fields = optarg;
  130. break;
  131. case 'k':
  132. check_integer_arg('k', optarg, 1, &max_keys_per_item);
  133. break;
  134. case 'l':
  135. check_integer_arg('l', optarg, 0, &shortest_len);
  136. break;
  137. case 'n':
  138. check_integer_arg('n', optarg, 0, &n_ignore_words);
  139. break;
  140. case 'o':
  141. base_name = optarg;
  142. break;
  143. case 't':
  144. check_integer_arg('t', optarg, 1, &truncate_len);
  145. break;
  146. case 'w':
  147. parser = do_whole_file;
  148. break;
  149. case 'v':
  150. printf("GNU indxbib (groff) version %s\n", Version_string);
  151. exit(0);
  152. break;
  153. case CHAR_MAX + 1: // --help
  154. usage(stdout);
  155. exit(0);
  156. break;
  157. case '?':
  158. usage(stderr);
  159. exit(1);
  160. break;
  161. default:
  162. assert(0);
  163. break;
  164. }
  165. if (optind >= argc && foption == 0)
  166. fatal("no files and no -f option");
  167. if (!directory) {
  168. char *path = get_cwd();
  169. store_filename(path);
  170. a_delete path;
  171. }
  172. else
  173. store_filename(directory);
  174. init_hash_table();
  175. store_filename(common_words_file);
  176. store_filename(ignore_fields);
  177. key_buffer = new char[truncate_len];
  178. read_common_words_file();
  179. if (!base_name)
  180. base_name = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME;
  181. const char *p = strrchr(base_name, DIR_SEPS[0]), *p1;
  182. const char *sep = &DIR_SEPS[1];
  183. while (*sep) {
  184. p1 = strrchr(base_name, *sep);
  185. if (p1 && (!p || p1 > p))
  186. p = p1;
  187. sep++;
  188. }
  189. size_t name_max;
  190. if (p) {
  191. char *dir = strsave(base_name);
  192. dir[p - base_name] = '\0';
  193. name_max = file_name_max(dir);
  194. a_delete dir;
  195. }
  196. else
  197. name_max = file_name_max(".");
  198. const char *filename = p ? p + 1 : base_name;
  199. if (strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max)
  200. fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX);
  201. if (p) {
  202. p++;
  203. temp_index_file = new char[p - base_name + sizeof(TEMP_INDEX_TEMPLATE)];
  204. memcpy(temp_index_file, base_name, p - base_name);
  205. strcpy(temp_index_file + (p - base_name), TEMP_INDEX_TEMPLATE);
  206. }
  207. else {
  208. temp_index_file = strsave(TEMP_INDEX_TEMPLATE);
  209. }
  210. catch_fatal_signals();
  211. int fd = mkstemp(temp_index_file);
  212. if (fd < 0)
  213. fatal("can't create temporary index file: %1", strerror(errno));
  214. indxfp = fdopen(fd, FOPEN_WB);
  215. if (indxfp == 0)
  216. fatal("fdopen failed");
  217. if (fseek(indxfp, sizeof(index_header), 0) < 0)
  218. fatal("can't seek past index header: %1", strerror(errno));
  219. int failed = 0;
  220. if (foption) {
  221. FILE *fp = stdin;
  222. if (strcmp(foption, "-") != 0) {
  223. errno = 0;
  224. fp = fopen(foption, "r");
  225. if (!fp)
  226. fatal("can't open `%1': %2", foption, strerror(errno));
  227. }
  228. string path;
  229. int lineno = 1;
  230. for (;;) {
  231. int c;
  232. for (c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) {
  233. if (c == '\0')
  234. error_with_file_and_line(foption, lineno,
  235. "nul character in pathname ignored");
  236. else
  237. path += c;
  238. }
  239. if (path.length() > 0) {
  240. path += '\0';
  241. if (!(*parser)(path.contents()))
  242. failed = 1;
  243. path.clear();
  244. }
  245. if (c == EOF)
  246. break;
  247. lineno++;
  248. }
  249. if (fp != stdin)
  250. fclose(fp);
  251. }
  252. for (int i = optind; i < argc; i++)
  253. if (!(*parser)(argv[i]))
  254. failed = 1;
  255. write_hash_table();
  256. if (fclose(indxfp) < 0)
  257. fatal("error closing temporary index file: %1", strerror(errno));
  258. char *index_file = new char[strlen(base_name) + sizeof(INDEX_SUFFIX)];
  259. strcpy(index_file, base_name);
  260. strcat(index_file, INDEX_SUFFIX);
  261. #ifdef HAVE_RENAME
  262. #ifdef __EMX__
  263. if (access(index_file, R_OK) == 0)
  264. unlink(index_file);
  265. #endif /* __EMX__ */
  266. if (rename(temp_index_file, index_file) < 0) {
  267. #ifdef __MSDOS__
  268. // RENAME could fail on plain MSDOS filesystems because
  269. // INDEX_FILE is an invalid filename, e.g. it has multiple dots.
  270. char *fname = p ? index_file + (p - base_name) : 0;
  271. char *dot = 0;
  272. // Replace the dot with an underscore and try again.
  273. if (fname
  274. && (dot = strchr(fname, '.')) != 0
  275. && strcmp(dot, INDEX_SUFFIX) != 0)
  276. *dot = '_';
  277. if (rename(temp_index_file, index_file) < 0)
  278. #endif
  279. fatal("can't rename temporary index file: %1", strerror(errno));
  280. }
  281. #else /* not HAVE_RENAME */
  282. ignore_fatal_signals();
  283. if (unlink(index_file) < 0) {
  284. if (errno != ENOENT)
  285. fatal("can't unlink `%1': %2", index_file, strerror(errno));
  286. }
  287. if (link(temp_index_file, index_file) < 0)
  288. fatal("can't link temporary index file: %1", strerror(errno));
  289. if (unlink(temp_index_file) < 0)
  290. fatal("can't unlink temporary index file: %1", strerror(errno));
  291. #endif /* not HAVE_RENAME */
  292. temp_index_file = 0;
  293. return failed;
  294. }
  295. static void usage(FILE *stream)
  296. {
  297. fprintf(stream,
  298. "usage: %s [-vw] [-c file] [-d dir] [-f file] [-h n] [-i XYZ] [-k n]\n"
  299. " [-l n] [-n n] [-o base] [-t n] [files...]\n",
  300. program_name);
  301. }
  302. static void check_integer_arg(char opt, const char *arg, int min, int *res)
  303. {
  304. char *ptr;
  305. long n = strtol(arg, &ptr, 10);
  306. if (n == 0 && ptr == arg)
  307. error("argument to -%1 not an integer", opt);
  308. else if (n < min)
  309. error("argument to -%1 must not be less than %2", opt, min);
  310. else {
  311. if (n > INT_MAX)
  312. error("argument to -%1 greater than maximum integer", opt);
  313. else if (*ptr != '\0')
  314. error("junk after integer argument to -%1", opt);
  315. *res = int(n);
  316. }
  317. }
  318. static char *get_cwd()
  319. {
  320. char *buf;
  321. int size = 12;
  322. for (;;) {
  323. buf = new char[size];
  324. if (getcwd(buf, size))
  325. break;
  326. if (errno != ERANGE)
  327. fatal("cannot get current working directory: %1", strerror(errno));
  328. a_delete buf;
  329. if (size == INT_MAX)
  330. fatal("current working directory longer than INT_MAX");
  331. if (size > INT_MAX/2)
  332. size = INT_MAX;
  333. else
  334. size *= 2;
  335. }
  336. return buf;
  337. }
  338. word_list::word_list(const char *s, int n, word_list *p)
  339. : next(p), len(n)
  340. {
  341. str = new char[n];
  342. memcpy(str, s, n);
  343. }
  344. static void read_common_words_file()
  345. {
  346. if (n_ignore_words <= 0)
  347. return;
  348. errno = 0;
  349. FILE *fp = fopen(common_words_file, "r");
  350. if (!fp)
  351. fatal("can't open `%1': %2", common_words_file, strerror(errno));
  352. common_words_table = new word_list * [hash_table_size];
  353. for (int i = 0; i < hash_table_size; i++)
  354. common_words_table[i] = 0;
  355. int count = 0;
  356. int key_len = 0;
  357. for (;;) {
  358. int c = getc(fp);
  359. while (c != EOF && !csalnum(c))
  360. c = getc(fp);
  361. if (c == EOF)
  362. break;
  363. do {
  364. if (key_len < truncate_len)
  365. key_buffer[key_len++] = cmlower(c);
  366. c = getc(fp);
  367. } while (c != EOF && csalnum(c));
  368. if (key_len >= shortest_len) {
  369. int h = hash(key_buffer, key_len) % hash_table_size;
  370. common_words_table[h] = new word_list(key_buffer, key_len,
  371. common_words_table[h]);
  372. }
  373. if (++count >= n_ignore_words)
  374. break;
  375. key_len = 0;
  376. if (c == EOF)
  377. break;
  378. }
  379. n_ignore_words = count;
  380. fclose(fp);
  381. }
  382. static int do_whole_file(const char *filename)
  383. {
  384. errno = 0;
  385. FILE *fp = fopen(filename, "r");
  386. if (!fp) {
  387. error("can't open `%1': %2", filename, strerror(errno));
  388. return 0;
  389. }
  390. int count = 0;
  391. int key_len = 0;
  392. int c;
  393. while ((c = getc(fp)) != EOF) {
  394. if (csalnum(c)) {
  395. key_len = 1;
  396. key_buffer[0] = c;
  397. while ((c = getc(fp)) != EOF) {
  398. if (!csalnum(c))
  399. break;
  400. if (key_len < truncate_len)
  401. key_buffer[key_len++] = c;
  402. }
  403. if (store_key(key_buffer, key_len)) {
  404. if (++count >= max_keys_per_item)
  405. break;
  406. }
  407. if (c == EOF)
  408. break;
  409. }
  410. }
  411. store_reference(filenames.length(), 0, 0);
  412. store_filename(filename);
  413. fclose(fp);
  414. return 1;
  415. }
  416. static int do_file(const char *filename)
  417. {
  418. errno = 0;
  419. // Need binary I/O for MS-DOS/MS-Windows, because indxbib relies on
  420. // byte counts to be consistent with fseek.
  421. FILE *fp = fopen(filename, FOPEN_RB);
  422. if (fp == 0) {
  423. error("can't open `%1': %2", filename, strerror(errno));
  424. return 0;
  425. }
  426. int filename_index = filenames.length();
  427. store_filename(filename);
  428. enum {
  429. START, // at the start of the file; also in between references
  430. BOL, // in the middle of a reference, at the beginning of the line
  431. PERCENT, // seen a percent at the beginning of the line
  432. IGNORE, // ignoring a field
  433. IGNORE_BOL, // at the beginning of a line ignoring a field
  434. KEY, // in the middle of a key
  435. DISCARD, // after truncate_len bytes of a key
  436. MIDDLE // in between keys
  437. } state = START;
  438. // In states START, BOL, IGNORE_BOL, space_count how many spaces at
  439. // the beginning have been seen. In states PERCENT, IGNORE, KEY,
  440. // MIDDLE space_count must be 0.
  441. int space_count = 0;
  442. int byte_count = 0; // bytes read
  443. int key_len = 0;
  444. int ref_start = -1; // position of start of current reference
  445. for (;;) {
  446. int c = getc(fp);
  447. if (c == EOF)
  448. break;
  449. // We opened the file in binary mode, so we need to skip
  450. // every CR character before a Newline.
  451. if (c == '\r') {
  452. int peek = getc(fp);
  453. if (peek == '\n') {
  454. byte_count++;
  455. c = peek;
  456. }
  457. else
  458. ungetc(peek, fp);
  459. }
  460. #if defined(__MSDOS__) || defined(_MSC_VER) || defined(__EMX__)
  461. else if (c == 0x1a) // ^Z means EOF in text files
  462. break;
  463. #endif
  464. byte_count++;
  465. switch (state) {
  466. case START:
  467. if (c == ' ' || c == '\t') {
  468. space_count++;
  469. break;
  470. }
  471. if (c == '\n') {
  472. space_count = 0;
  473. break;
  474. }
  475. ref_start = byte_count - space_count - 1;
  476. space_count = 0;
  477. if (c == '%')
  478. state = PERCENT;
  479. else if (csalnum(c)) {
  480. state = KEY;
  481. key_buffer[0] = c;
  482. key_len = 1;
  483. }
  484. else
  485. state = MIDDLE;
  486. break;
  487. case BOL:
  488. switch (c) {
  489. case '%':
  490. if (space_count > 0) {
  491. space_count = 0;
  492. state = MIDDLE;
  493. }
  494. else
  495. state = PERCENT;
  496. break;
  497. case ' ':
  498. case '\t':
  499. space_count++;
  500. break;
  501. case '\n':
  502. store_reference(filename_index, ref_start,
  503. byte_count - 1 - space_count - ref_start);
  504. state = START;
  505. space_count = 0;
  506. break;
  507. default:
  508. space_count = 0;
  509. if (csalnum(c)) {
  510. state = KEY;
  511. key_buffer[0] = c;
  512. key_len = 1;
  513. }
  514. else
  515. state = MIDDLE;
  516. }
  517. break;
  518. case PERCENT:
  519. if (strchr(ignore_fields, c) != 0)
  520. state = IGNORE;
  521. else if (c == '\n')
  522. state = BOL;
  523. else
  524. state = MIDDLE;
  525. break;
  526. case IGNORE:
  527. if (c == '\n')
  528. state = IGNORE_BOL;
  529. break;
  530. case IGNORE_BOL:
  531. switch (c) {
  532. case '%':
  533. if (space_count > 0) {
  534. state = IGNORE;
  535. space_count = 0;
  536. }
  537. else
  538. state = PERCENT;
  539. break;
  540. case ' ':
  541. case '\t':
  542. space_count++;
  543. break;
  544. case '\n':
  545. store_reference(filename_index, ref_start,
  546. byte_count - 1 - space_count - ref_start);
  547. state = START;
  548. space_count = 0;
  549. break;
  550. default:
  551. space_count = 0;
  552. state = IGNORE;
  553. }
  554. break;
  555. case KEY:
  556. if (csalnum(c)) {
  557. if (key_len < truncate_len)
  558. key_buffer[key_len++] = c;
  559. else
  560. state = DISCARD;
  561. }
  562. else {
  563. possibly_store_key(key_buffer, key_len);
  564. key_len = 0;
  565. if (c == '\n')
  566. state = BOL;
  567. else
  568. state = MIDDLE;
  569. }
  570. break;
  571. case DISCARD:
  572. if (!csalnum(c)) {
  573. possibly_store_key(key_buffer, key_len);
  574. key_len = 0;
  575. if (c == '\n')
  576. state = BOL;
  577. else
  578. state = MIDDLE;
  579. }
  580. break;
  581. case MIDDLE:
  582. if (csalnum(c)) {
  583. state = KEY;
  584. key_buffer[0] = c;
  585. key_len = 1;
  586. }
  587. else if (c == '\n')
  588. state = BOL;
  589. break;
  590. default:
  591. assert(0);
  592. }
  593. }
  594. switch (state) {
  595. case START:
  596. break;
  597. case DISCARD:
  598. case KEY:
  599. possibly_store_key(key_buffer, key_len);
  600. // fall through
  601. case BOL:
  602. case PERCENT:
  603. case IGNORE_BOL:
  604. case IGNORE:
  605. case MIDDLE:
  606. store_reference(filename_index, ref_start,
  607. byte_count - ref_start - space_count);
  608. break;
  609. default:
  610. assert(0);
  611. }
  612. fclose(fp);
  613. return 1;
  614. }
  615. static void store_reference(int filename_index, int pos, int len)
  616. {
  617. tag t;
  618. t.filename_index = filename_index;
  619. t.start = pos;
  620. t.length = len;
  621. fwrite_or_die(&t, sizeof(t), 1, indxfp);
  622. ntags++;
  623. }
  624. static void store_filename(const char *fn)
  625. {
  626. filenames += fn;
  627. filenames += '\0';
  628. }
  629. static void init_hash_table()
  630. {
  631. hash_table = new table_entry[hash_table_size];
  632. for (int i = 0; i < hash_table_size; i++)
  633. hash_table[i].ptr = 0;
  634. }
  635. static void possibly_store_key(char *s, int len)
  636. {
  637. static int last_tagno = -1;
  638. static int key_count;
  639. if (last_tagno != ntags) {
  640. last_tagno = ntags;
  641. key_count = 0;
  642. }
  643. if (key_count < max_keys_per_item) {
  644. if (store_key(s, len))
  645. key_count++;
  646. }
  647. }
  648. static int store_key(char *s, int len)
  649. {
  650. if (len < shortest_len)
  651. return 0;
  652. int is_number = 1;
  653. for (int i = 0; i < len; i++)
  654. if (!csdigit(s[i])) {
  655. is_number = 0;
  656. s[i] = cmlower(s[i]);
  657. }
  658. if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9'))
  659. return 0;
  660. int h = hash(s, len) % hash_table_size;
  661. if (common_words_table) {
  662. for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next)
  663. if (len == ptr->len && memcmp(s, ptr->str, len) == 0)
  664. return 0;
  665. }
  666. table_entry *pp = hash_table + h;
  667. if (!pp->ptr)
  668. pp->ptr = new block;
  669. else if (pp->ptr->v[pp->ptr->used - 1] == ntags)
  670. return 1;
  671. else if (pp->ptr->used >= BLOCK_SIZE)
  672. pp->ptr = new block(pp->ptr);
  673. pp->ptr->v[(pp->ptr->used)++] = ntags;
  674. return 1;
  675. }
  676. static void write_hash_table()
  677. {
  678. const int minus_one = -1;
  679. int li = 0;
  680. for (int i = 0; i < hash_table_size; i++) {
  681. block *ptr = hash_table[i].ptr;
  682. if (!ptr)
  683. hash_table[i].count = -1;
  684. else {
  685. hash_table[i].count = li;
  686. block *rev = 0;
  687. while (ptr) {
  688. block *tem = ptr;
  689. ptr = ptr->next;
  690. tem->next = rev;
  691. rev = tem;
  692. }
  693. while (rev) {
  694. fwrite_or_die(rev->v, sizeof(int), rev->used, indxfp);
  695. li += rev->used;
  696. block *tem = rev;
  697. rev = rev->next;
  698. delete tem;
  699. }
  700. fwrite_or_die(&minus_one, sizeof(int), 1, indxfp);
  701. li += 1;
  702. }
  703. }
  704. if (sizeof(table_entry) == sizeof(int))
  705. fwrite_or_die(hash_table, sizeof(int), hash_table_size, indxfp);
  706. else {
  707. // write it out word by word
  708. for (int i = 0; i < hash_table_size; i++)
  709. fwrite_or_die(&hash_table[i].count, sizeof(int), 1, indxfp);
  710. }
  711. fwrite_or_die(filenames.contents(), 1, filenames.length(), indxfp);
  712. if (fseek(indxfp, 0, 0) < 0)
  713. fatal("error seeking on index file: %1", strerror(errno));
  714. index_header h;
  715. h.magic = INDEX_MAGIC;
  716. h.version = INDEX_VERSION;
  717. h.tags_size = ntags;
  718. h.lists_size = li;
  719. h.table_size = hash_table_size;
  720. h.strings_size = filenames.length();
  721. h.truncate = truncate_len;
  722. h.shortest = shortest_len;
  723. h.common = n_ignore_words;
  724. fwrite_or_die(&h, sizeof(h), 1, indxfp);
  725. }
  726. static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp)
  727. {
  728. if (fwrite(ptr, size, nitems, fp) != (size_t)nitems)
  729. fatal("fwrite failed: %1", strerror(errno));
  730. }
  731. void fatal_error_exit()
  732. {
  733. cleanup();
  734. exit(3);
  735. }
  736. extern "C" {
  737. void cleanup()
  738. {
  739. if (temp_index_file)
  740. unlink(temp_index_file);
  741. }
  742. }