PageRenderTime 33ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/usr/src/lib/libast/common/misc/magic.c

https://bitbucket.org/a3217055/illumos-joyent
C | 2347 lines | 2221 code | 67 blank | 59 comment | 338 complexity | 9b0aff5ac94817988fa319976cfcca68 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, GPL-2.0, GPL-3.0, 0BSD, BSD-2-Clause, BSD-3-Clause-No-Nuclear-License-2014, MPL-2.0-no-copyleft-exception, AGPL-1.0, LGPL-2.1, LGPL-2.0
  1. /***********************************************************************
  2. * *
  3. * This software is part of the ast package *
  4. * Copyright (c) 1985-2010 AT&T Intellectual Property *
  5. * and is licensed under the *
  6. * Common Public License, Version 1.0 *
  7. * by AT&T Intellectual Property *
  8. * *
  9. * A copy of the License is available at *
  10. * http://www.opensource.org/licenses/cpl1.0.txt *
  11. * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
  12. * *
  13. * Information and Software Systems Research *
  14. * AT&T Research *
  15. * Florham Park NJ *
  16. * *
  17. * Glenn Fowler <gsf@research.att.com> *
  18. * David Korn <dgk@research.att.com> *
  19. * Phong Vo <kpv@research.att.com> *
  20. * *
  21. ***********************************************************************/
  22. #pragma prototyped
  23. /*
  24. * Glenn Fowler
  25. * AT&T Research
  26. *
  27. * library interface to file
  28. *
  29. * the sum of the hacks {s5,v10,planix} is _____ than the parts
  30. */
  31. static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n";
  32. static const char lib[] = "libast:magic";
  33. #include <ast.h>
  34. #include <ctype.h>
  35. #include <ccode.h>
  36. #include <dt.h>
  37. #include <modex.h>
  38. #include <error.h>
  39. #include <regex.h>
  40. #include <swap.h>
  41. #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
  42. #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
  43. #define MAXNEST 10 /* { ... } nesting limit */
  44. #define MINITEM 4 /* magic buffer rounding */
  45. typedef struct /* identifier dictionary entry */
  46. {
  47. const char name[16]; /* identifier name */
  48. int value; /* identifier value */
  49. Dtlink_t link; /* dictionary link */
  50. } Info_t;
  51. typedef struct Edit /* edit substitution */
  52. {
  53. struct Edit* next; /* next in list */
  54. regex_t* from; /* from pattern */
  55. } Edit_t;
  56. struct Entry;
  57. typedef struct /* loop info */
  58. {
  59. struct Entry* lab; /* call this function */
  60. int start; /* start here */
  61. int size; /* increment by this amount */
  62. int count; /* dynamic loop count */
  63. int offset; /* dynamic offset */
  64. } Loop_t;
  65. typedef struct Entry /* magic file entry */
  66. {
  67. struct Entry* next; /* next in list */
  68. char* expr; /* offset expression */
  69. union
  70. {
  71. unsigned long num;
  72. char* str;
  73. struct Entry* lab;
  74. regex_t* sub;
  75. Loop_t* loop;
  76. } value; /* comparison value */
  77. char* desc; /* file description */
  78. char* mime; /* file mime type */
  79. unsigned long offset; /* offset in bytes */
  80. unsigned long mask; /* mask before compare */
  81. char cont; /* continuation operation */
  82. char type; /* datum type */
  83. char op; /* comparison operation */
  84. char nest; /* { or } nesting operation */
  85. char swap; /* forced swap order */
  86. } Entry_t;
  87. #define CC_BIT 5
  88. #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
  89. typedef unsigned short Cctype_t;
  90. #else
  91. typedef unsigned long Cctype_t;
  92. #endif
  93. #define CC_text 0x01
  94. #define CC_control 0x02
  95. #define CC_latin 0x04
  96. #define CC_binary 0x08
  97. #define CC_utf_8 0x10
  98. #define CC_notext CC_text /* CC_text is flipped before checking */
  99. #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
  100. #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
  101. #define ID_NONE 0
  102. #define ID_ASM 1
  103. #define ID_C 2
  104. #define ID_COBOL 3
  105. #define ID_COPYBOOK 4
  106. #define ID_CPLUSPLUS 5
  107. #define ID_FORTRAN 6
  108. #define ID_HTML 7
  109. #define ID_INCL1 8
  110. #define ID_INCL2 9
  111. #define ID_INCL3 10
  112. #define ID_MAM1 11
  113. #define ID_MAM2 12
  114. #define ID_MAM3 13
  115. #define ID_NOTEXT 14
  116. #define ID_PL1 15
  117. #define ID_YACC 16
  118. #define ID_MAX ID_YACC
  119. #define INFO_atime 1
  120. #define INFO_blocks 2
  121. #define INFO_ctime 3
  122. #define INFO_fstype 4
  123. #define INFO_gid 5
  124. #define INFO_mode 6
  125. #define INFO_mtime 7
  126. #define INFO_name 8
  127. #define INFO_nlink 9
  128. #define INFO_size 10
  129. #define INFO_uid 11
  130. #define _MAGIC_PRIVATE_ \
  131. Magicdisc_t* disc; /* discipline */ \
  132. Vmalloc_t* vm; /* vmalloc region */ \
  133. Entry_t* magic; /* parsed magic table */ \
  134. Entry_t* magiclast; /* last entry in magic */ \
  135. char* mime; /* MIME type */ \
  136. unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
  137. char fbuf[SF_BUFSIZE + 1]; /* file data */ \
  138. char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
  139. char nbuf[256]; /* !CC_NATIVE data */ \
  140. char mbuf[64]; /* mime string */ \
  141. char sbuf[64]; /* type suffix string */ \
  142. char tbuf[2 * PATH_MAX]; /* type string */ \
  143. Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
  144. unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
  145. unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
  146. int keep[MAXNEST]; /* ckmagic nest stack */ \
  147. char* cap[MAXNEST]; /* ckmagic mime stack */ \
  148. char* msg[MAXNEST]; /* ckmagic text stack */ \
  149. Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
  150. int fbsz; /* fbuf size */ \
  151. int fbmx; /* fbuf max size */ \
  152. int xbsz; /* xbuf size */ \
  153. int swap; /* swap() operation */ \
  154. unsigned long flags; /* disc+open flags */ \
  155. long xoff; /* xbuf offset */ \
  156. int identifier[ID_MAX + 1]; /* Info_t identifier */ \
  157. Sfio_t* fp; /* fbuf fp */ \
  158. Sfio_t* tmp; /* tmp string */ \
  159. regdisc_t redisc; /* regex discipline */ \
  160. Dtdisc_t dtdisc; /* dict discipline */ \
  161. Dt_t* idtab; /* identifier dict */ \
  162. Dt_t* infotab; /* info keyword dict */
  163. #include <magic.h>
  164. static Info_t dict[] = /* keyword dictionary */
  165. {
  166. { "COMMON", ID_FORTRAN },
  167. { "COMPUTE", ID_COBOL },
  168. { "COMP", ID_COPYBOOK },
  169. { "COMPUTATIONAL",ID_COPYBOOK },
  170. { "DCL", ID_PL1 },
  171. { "DEFINED", ID_PL1 },
  172. { "DIMENSION", ID_FORTRAN },
  173. { "DIVISION", ID_COBOL },
  174. { "FILLER", ID_COPYBOOK },
  175. { "FIXED", ID_PL1 },
  176. { "FUNCTION", ID_FORTRAN },
  177. { "HTML", ID_HTML },
  178. { "INTEGER", ID_FORTRAN },
  179. { "MAIN", ID_PL1 },
  180. { "OPTIONS", ID_PL1 },
  181. { "PERFORM", ID_COBOL },
  182. { "PIC", ID_COPYBOOK },
  183. { "REAL", ID_FORTRAN },
  184. { "REDEFINES", ID_COPYBOOK },
  185. { "S9", ID_COPYBOOK },
  186. { "SECTION", ID_COBOL },
  187. { "SELECT", ID_COBOL },
  188. { "SUBROUTINE", ID_FORTRAN },
  189. { "TEXT", ID_ASM },
  190. { "VALUE", ID_COPYBOOK },
  191. { "attr", ID_MAM3 },
  192. { "binary", ID_YACC },
  193. { "block", ID_FORTRAN },
  194. { "bss", ID_ASM },
  195. { "byte", ID_ASM },
  196. { "char", ID_C },
  197. { "class", ID_CPLUSPLUS },
  198. { "clr", ID_NOTEXT },
  199. { "comm", ID_ASM },
  200. { "common", ID_FORTRAN },
  201. { "data", ID_ASM },
  202. { "dimension", ID_FORTRAN },
  203. { "done", ID_MAM2 },
  204. { "double", ID_C },
  205. { "even", ID_ASM },
  206. { "exec", ID_MAM3 },
  207. { "extern", ID_C },
  208. { "float", ID_C },
  209. { "function", ID_FORTRAN },
  210. { "globl", ID_ASM },
  211. { "h", ID_INCL3 },
  212. { "html", ID_HTML },
  213. { "include", ID_INCL1 },
  214. { "int", ID_C },
  215. { "integer", ID_FORTRAN },
  216. { "jmp", ID_NOTEXT },
  217. { "left", ID_YACC },
  218. { "libc", ID_INCL2 },
  219. { "long", ID_C },
  220. { "make", ID_MAM1 },
  221. { "mov", ID_NOTEXT },
  222. { "private", ID_CPLUSPLUS },
  223. { "public", ID_CPLUSPLUS },
  224. { "real", ID_FORTRAN },
  225. { "register", ID_C },
  226. { "right", ID_YACC },
  227. { "sfio", ID_INCL2 },
  228. { "static", ID_C },
  229. { "stdio", ID_INCL2 },
  230. { "struct", ID_C },
  231. { "subroutine", ID_FORTRAN },
  232. { "sys", ID_NOTEXT },
  233. { "term", ID_YACC },
  234. { "text", ID_ASM },
  235. { "tst", ID_NOTEXT },
  236. { "type", ID_YACC },
  237. { "typedef", ID_C },
  238. { "u", ID_INCL2 },
  239. { "union", ID_YACC },
  240. { "void", ID_C },
  241. };
  242. static Info_t info[] =
  243. {
  244. { "atime", INFO_atime },
  245. { "blocks", INFO_blocks },
  246. { "ctime", INFO_ctime },
  247. { "fstype", INFO_fstype },
  248. { "gid", INFO_gid },
  249. { "mode", INFO_mode },
  250. { "mtime", INFO_mtime },
  251. { "name", INFO_name },
  252. { "nlink", INFO_nlink },
  253. { "size", INFO_size },
  254. { "uid", INFO_uid },
  255. };
  256. /*
  257. * return pointer to data at offset off and size siz
  258. */
  259. static char*
  260. getdata(register Magic_t* mp, register long off, register int siz)
  261. {
  262. register long n;
  263. if (off < 0)
  264. return 0;
  265. if (off + siz <= mp->fbsz)
  266. return mp->fbuf + off;
  267. if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
  268. {
  269. if (off + siz > mp->fbmx)
  270. return 0;
  271. n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
  272. if (sfseek(mp->fp, n, SEEK_SET) != n)
  273. return 0;
  274. if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
  275. {
  276. mp->xoff = 0;
  277. mp->xbsz = 0;
  278. return 0;
  279. }
  280. mp->xbuf[mp->xbsz] = 0;
  281. mp->xoff = n;
  282. if (off + siz > mp->xoff + mp->xbsz)
  283. return 0;
  284. }
  285. return mp->xbuf + off - mp->xoff;
  286. }
  287. /*
  288. * @... evaluator for strexpr()
  289. */
  290. static long
  291. indirect(const char* cs, char** e, void* handle)
  292. {
  293. register char* s = (char*)cs;
  294. register Magic_t* mp = (Magic_t*)handle;
  295. register long n = 0;
  296. register char* p;
  297. if (s)
  298. {
  299. if (*s == '@')
  300. {
  301. n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
  302. switch (*(s = *e))
  303. {
  304. case 'b':
  305. case 'B':
  306. s++;
  307. if (p = getdata(mp, n, 1))
  308. n = *(unsigned char*)p;
  309. else
  310. s = (char*)cs;
  311. break;
  312. case 'h':
  313. case 'H':
  314. s++;
  315. if (p = getdata(mp, n, 2))
  316. n = swapget(mp->swap, p, 2);
  317. else
  318. s = (char*)cs;
  319. break;
  320. case 'q':
  321. case 'Q':
  322. s++;
  323. if (p = getdata(mp, n, 8))
  324. n = swapget(mp->swap, p, 8);
  325. else
  326. s = (char*)cs;
  327. break;
  328. default:
  329. if (isalnum(*s))
  330. s++;
  331. if (p = getdata(mp, n, 4))
  332. n = swapget(mp->swap, p, 4);
  333. else
  334. s = (char*)cs;
  335. break;
  336. }
  337. }
  338. *e = s;
  339. }
  340. else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  341. (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
  342. return n;
  343. }
  344. /*
  345. * emit regex error message
  346. */
  347. static void
  348. regmessage(Magic_t* mp, regex_t* re, int code)
  349. {
  350. char buf[128];
  351. if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  352. {
  353. regerror(code, re, buf, sizeof(buf));
  354. (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
  355. }
  356. }
  357. /*
  358. * decompose vcodex(3) method composition
  359. */
  360. static char*
  361. vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
  362. {
  363. unsigned char* map;
  364. const char* o;
  365. int c;
  366. int n;
  367. int i;
  368. int a;
  369. map = CCMAP(CC_ASCII, CC_NATIVE);
  370. a = 0;
  371. i = 1;
  372. for (;;)
  373. {
  374. if (i)
  375. i = 0;
  376. else
  377. *b++ = '^';
  378. if (m < (x - 1) && !*(m + 1))
  379. {
  380. /*
  381. * obsolete indices
  382. */
  383. if (!a)
  384. {
  385. a = 1;
  386. o = "old, ";
  387. while (b < e && (c = *o++))
  388. *b++ = c;
  389. }
  390. switch (*m)
  391. {
  392. case 0: o = "delta"; break;
  393. case 1: o = "huffman"; break;
  394. case 2: o = "huffgroup"; break;
  395. case 3: o = "arith"; break;
  396. case 4: o = "bwt"; break;
  397. case 5: o = "rle"; break;
  398. case 6: o = "mtf"; break;
  399. case 7: o = "transpose"; break;
  400. case 8: o = "table"; break;
  401. case 9: o = "huffpart"; break;
  402. case 50: o = "map"; break;
  403. case 100: o = "recfm"; break;
  404. case 101: o = "ss7"; break;
  405. default: o = "UNKNOWN"; break;
  406. }
  407. m += 2;
  408. while (b < e && (c = *o++))
  409. *b++ = c;
  410. }
  411. else
  412. while (b < e && m < x && (c = *m++))
  413. {
  414. if (map)
  415. c = map[c];
  416. *b++ = c;
  417. }
  418. if (b >= e)
  419. break;
  420. n = 0;
  421. while (m < x)
  422. {
  423. n = (n<<7) | (*m & 0x7f);
  424. if (!(*m++ & 0x80))
  425. break;
  426. }
  427. if (n >= (x - m))
  428. break;
  429. m += n;
  430. }
  431. return b;
  432. }
  433. /*
  434. * check for magic table match in buf
  435. */
  436. static char*
  437. ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
  438. {
  439. register Entry_t* ep;
  440. register char* p;
  441. register char* b;
  442. register int level = 0;
  443. int call = -1;
  444. int c;
  445. char* q;
  446. char* t;
  447. char* base = 0;
  448. unsigned long num;
  449. unsigned long mask;
  450. regmatch_t matches[10];
  451. mp->swap = 0;
  452. b = mp->msg[0] = buf;
  453. mp->mime = mp->cap[0] = 0;
  454. mp->keep[0] = 0;
  455. for (ep = mp->magic; ep; ep = ep->next)
  456. {
  457. fun:
  458. if (ep->nest == '{')
  459. {
  460. if (++level >= MAXNEST)
  461. {
  462. call = -1;
  463. level = 0;
  464. mp->keep[0] = 0;
  465. b = mp->msg[0];
  466. mp->mime = mp->cap[0];
  467. continue;
  468. }
  469. mp->keep[level] = mp->keep[level - 1] != 0;
  470. mp->msg[level] = b;
  471. mp->cap[level] = mp->mime;
  472. }
  473. switch (ep->cont)
  474. {
  475. case '#':
  476. if (mp->keep[level] && b > buf)
  477. {
  478. *b = 0;
  479. return buf;
  480. }
  481. mp->swap = 0;
  482. b = mp->msg[0] = buf;
  483. mp->mime = mp->cap[0] = 0;
  484. if (ep->type == ' ')
  485. continue;
  486. break;
  487. case '$':
  488. if (mp->keep[level] && call < (MAXNEST - 1))
  489. {
  490. mp->ret[++call] = ep;
  491. ep = ep->value.lab;
  492. goto fun;
  493. }
  494. continue;
  495. case ':':
  496. ep = mp->ret[call--];
  497. if (ep->op == 'l')
  498. goto fun;
  499. continue;
  500. case '|':
  501. if (mp->keep[level] > 1)
  502. goto checknest;
  503. /*FALLTHROUGH*/
  504. default:
  505. if (!mp->keep[level])
  506. {
  507. b = mp->msg[level];
  508. mp->mime = mp->cap[level];
  509. goto checknest;
  510. }
  511. break;
  512. }
  513. p = "";
  514. num = 0;
  515. if (!ep->expr)
  516. num = ep->offset + off;
  517. else
  518. switch (ep->offset)
  519. {
  520. case 0:
  521. num = strexpr(ep->expr, NiL, indirect, mp) + off;
  522. break;
  523. case INFO_atime:
  524. num = st->st_atime;
  525. ep->type = 'D';
  526. break;
  527. case INFO_blocks:
  528. num = iblocks(st);
  529. ep->type = 'N';
  530. break;
  531. case INFO_ctime:
  532. num = st->st_ctime;
  533. ep->type = 'D';
  534. break;
  535. case INFO_fstype:
  536. p = fmtfs(st);
  537. ep->type = toupper(ep->type);
  538. break;
  539. case INFO_gid:
  540. if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
  541. {
  542. p = fmtgid(st->st_gid);
  543. ep->type = toupper(ep->type);
  544. }
  545. else
  546. {
  547. num = st->st_gid;
  548. ep->type = 'N';
  549. }
  550. break;
  551. case INFO_mode:
  552. if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
  553. {
  554. p = fmtmode(st->st_mode, 0);
  555. ep->type = toupper(ep->type);
  556. }
  557. else
  558. {
  559. num = modex(st->st_mode);
  560. ep->type = 'N';
  561. }
  562. break;
  563. case INFO_mtime:
  564. num = st->st_ctime;
  565. ep->type = 'D';
  566. break;
  567. case INFO_name:
  568. if (!base)
  569. {
  570. if (base = strrchr(file, '/'))
  571. base++;
  572. else
  573. base = (char*)file;
  574. }
  575. p = base;
  576. ep->type = toupper(ep->type);
  577. break;
  578. case INFO_nlink:
  579. num = st->st_nlink;
  580. ep->type = 'N';
  581. break;
  582. case INFO_size:
  583. num = st->st_size;
  584. ep->type = 'N';
  585. break;
  586. case INFO_uid:
  587. if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
  588. {
  589. p = fmtuid(st->st_uid);
  590. ep->type = toupper(ep->type);
  591. }
  592. else
  593. {
  594. num = st->st_uid;
  595. ep->type = 'N';
  596. }
  597. break;
  598. }
  599. switch (ep->type)
  600. {
  601. case 'b':
  602. if (!(p = getdata(mp, num, 1)))
  603. goto next;
  604. num = *(unsigned char*)p;
  605. break;
  606. case 'h':
  607. if (!(p = getdata(mp, num, 2)))
  608. goto next;
  609. num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
  610. break;
  611. case 'd':
  612. case 'l':
  613. case 'v':
  614. if (!(p = getdata(mp, num, 4)))
  615. goto next;
  616. num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
  617. break;
  618. case 'q':
  619. if (!(p = getdata(mp, num, 8)))
  620. goto next;
  621. num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
  622. break;
  623. case 'e':
  624. if (!(p = getdata(mp, num, 0)))
  625. goto next;
  626. /*FALLTHROUGH*/
  627. case 'E':
  628. if (!ep->value.sub)
  629. goto next;
  630. if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
  631. {
  632. c = mp->fbsz;
  633. if (c >= sizeof(mp->nbuf))
  634. c = sizeof(mp->nbuf) - 1;
  635. p = (char*)memcpy(mp->nbuf, p, c);
  636. p[c] = 0;
  637. ccmapstr(mp->x2n, p, c);
  638. if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
  639. {
  640. if (c != REG_NOMATCH)
  641. regmessage(mp, ep->value.sub, c);
  642. goto next;
  643. }
  644. }
  645. p = ep->value.sub->re_sub->re_buf;
  646. q = T(ep->desc);
  647. t = *q ? q : p;
  648. if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
  649. *b++ = ' ';
  650. b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
  651. if (ep->mime)
  652. mp->mime = ep->mime;
  653. goto checknest;
  654. case 's':
  655. if (!(p = getdata(mp, num, ep->mask)))
  656. goto next;
  657. goto checkstr;
  658. case 'm':
  659. if (!(p = getdata(mp, num, 0)))
  660. goto next;
  661. /*FALLTHROUGH*/
  662. case 'M':
  663. case 'S':
  664. checkstr:
  665. for (;;)
  666. {
  667. if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
  668. break;
  669. if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
  670. break;
  671. if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
  672. goto next;
  673. p = (char*)memcpy(mp->nbuf, p, ep->mask);
  674. p[ep->mask] = 0;
  675. ccmapstr(mp->x2n, p, ep->mask);
  676. }
  677. q = T(ep->desc);
  678. if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
  679. *b++ = ' ';
  680. for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
  681. *t = 0;
  682. b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
  683. *t = c;
  684. if (ep->mime)
  685. mp->mime = ep->mime;
  686. goto checknest;
  687. }
  688. if (mask = ep->mask)
  689. num &= mask;
  690. switch (ep->op)
  691. {
  692. case '=':
  693. case '@':
  694. if (num == ep->value.num)
  695. break;
  696. if (ep->cont != '#')
  697. goto next;
  698. if (!mask)
  699. mask = ~mask;
  700. if (ep->type == 'h')
  701. {
  702. if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
  703. {
  704. if (!(mp->swap & (mp->swap + 1)))
  705. mp->swap = 7;
  706. goto swapped;
  707. }
  708. }
  709. else if (ep->type == 'l')
  710. {
  711. for (c = 1; c < 4; c++)
  712. if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
  713. {
  714. if (!(mp->swap & (mp->swap + 1)))
  715. mp->swap = 7;
  716. goto swapped;
  717. }
  718. }
  719. else if (ep->type == 'q')
  720. {
  721. for (c = 1; c < 8; c++)
  722. if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
  723. goto swapped;
  724. }
  725. goto next;
  726. case '!':
  727. if (num != ep->value.num)
  728. break;
  729. goto next;
  730. case '^':
  731. if (num ^ ep->value.num)
  732. break;
  733. goto next;
  734. case '>':
  735. if (num > ep->value.num)
  736. break;
  737. goto next;
  738. case '<':
  739. if (num < ep->value.num)
  740. break;
  741. goto next;
  742. case 'l':
  743. if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
  744. {
  745. if (!ep->value.loop->count)
  746. {
  747. ep->value.loop->count = num;
  748. ep->value.loop->offset = off;
  749. off = ep->value.loop->start;
  750. }
  751. else if (!--ep->value.loop->count)
  752. {
  753. off = ep->value.loop->offset;
  754. goto next;
  755. }
  756. else
  757. off += ep->value.loop->size;
  758. mp->ret[++call] = ep;
  759. ep = ep->value.loop->lab;
  760. goto fun;
  761. }
  762. goto next;
  763. case 'm':
  764. c = mp->swap;
  765. t = ckmagic(mp, file, b + (b > buf), st, num);
  766. mp->swap = c;
  767. if (!t)
  768. goto next;
  769. if (b > buf)
  770. *b = ' ';
  771. b += strlen(b);
  772. break;
  773. case 'r':
  774. #if _UWIN
  775. {
  776. char* e;
  777. Sfio_t* rp;
  778. Sfio_t* gp;
  779. if (!(t = strrchr(file, '.')))
  780. goto next;
  781. sfprintf(mp->tmp, "/reg/classes_root/%s", t);
  782. if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
  783. goto next;
  784. *ep->desc = 0;
  785. *ep->mime = 0;
  786. gp = 0;
  787. while (t = sfgetr(rp, '\n', 1))
  788. {
  789. if (strneq(t, "Content Type=", 13))
  790. {
  791. ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
  792. strcpy(ep->mime, t + 13);
  793. if (gp)
  794. break;
  795. }
  796. else
  797. {
  798. sfprintf(mp->tmp, "/reg/classes_root/%s", t);
  799. if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
  800. {
  801. ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
  802. strcpy(ep->desc, t);
  803. if (*ep->mime)
  804. break;
  805. }
  806. }
  807. }
  808. sfclose(rp);
  809. if (!gp)
  810. goto next;
  811. if (!*ep->mime)
  812. {
  813. t = T(ep->desc);
  814. if (!strncasecmp(t, "microsoft", 9))
  815. t += 9;
  816. while (isspace(*t))
  817. t++;
  818. e = "application/x-ms-";
  819. ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
  820. e = strcopy(ep->mime, e);
  821. while ((c = *t++) && c != '.' && c != ' ')
  822. *e++ = isupper(c) ? tolower(c) : c;
  823. *e = 0;
  824. }
  825. while (t = sfgetr(gp, '\n', 1))
  826. if (*t && !streq(t, "\"\""))
  827. {
  828. ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
  829. strcpy(ep->desc, t);
  830. break;
  831. }
  832. sfclose(gp);
  833. if (!*ep->desc)
  834. goto next;
  835. if (!t)
  836. for (t = T(ep->desc); *t; t++)
  837. if (*t == '.')
  838. *t = ' ';
  839. if (!mp->keep[level])
  840. mp->keep[level] = 2;
  841. mp->mime = ep->mime;
  842. break;
  843. }
  844. #else
  845. if (ep->cont == '#' && !mp->keep[level])
  846. mp->keep[level] = 1;
  847. goto next;
  848. #endif
  849. case 'v':
  850. if (!(p = getdata(mp, num, 4)))
  851. goto next;
  852. c = 0;
  853. do
  854. {
  855. num++;
  856. c = (c<<7) | (*p & 0x7f);
  857. } while (*p++ & 0x80);
  858. if (!(p = getdata(mp, num, c)))
  859. goto next;
  860. if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
  861. {
  862. *b++ = ',';
  863. *b++ = ' ';
  864. }
  865. b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
  866. goto checknest;
  867. }
  868. swapped:
  869. q = T(ep->desc);
  870. if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
  871. *b++ = ' ';
  872. if (ep->type == 'd' || ep->type == 'D')
  873. b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
  874. else if (ep->type == 'v')
  875. b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
  876. else
  877. b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
  878. if (ep->mime && *ep->mime)
  879. mp->mime = ep->mime;
  880. checknest:
  881. if (ep->nest == '}')
  882. {
  883. if (!mp->keep[level])
  884. {
  885. b = mp->msg[level];
  886. mp->mime = mp->cap[level];
  887. }
  888. else if (level > 0)
  889. mp->keep[level - 1] = mp->keep[level];
  890. if (--level < 0)
  891. {
  892. level = 0;
  893. mp->keep[0] = 0;
  894. }
  895. }
  896. continue;
  897. next:
  898. if (ep->cont == '&')
  899. mp->keep[level] = 0;
  900. goto checknest;
  901. }
  902. if (mp->keep[level] && b > buf)
  903. {
  904. *b = 0;
  905. return buf;
  906. }
  907. return 0;
  908. }
  909. /*
  910. * check english language stats
  911. */
  912. static int
  913. ckenglish(register Magic_t* mp, int pun, int badpun)
  914. {
  915. register char* s;
  916. register int vowl = 0;
  917. register int freq = 0;
  918. register int rare = 0;
  919. if (5 * badpun > pun)
  920. return 0;
  921. if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
  922. return 0;
  923. if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
  924. return 0;
  925. for (s = "aeiou"; *s; s++)
  926. vowl += mp->count[toupper(*s)] + mp->count[*s];
  927. for (s = "etaion"; *s; s++)
  928. freq += mp->count[toupper(*s)] + mp->count[*s];
  929. for (s = "vjkqxz"; *s; s++)
  930. rare += mp->count[toupper(*s)] + mp->count[*s];
  931. return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
  932. }
  933. /*
  934. * check programming language stats
  935. */
  936. static char*
  937. cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
  938. {
  939. register int c;
  940. register unsigned char* b;
  941. register unsigned char* e;
  942. register int q;
  943. register char* s;
  944. char* t;
  945. char* base;
  946. char* suff;
  947. char* t1;
  948. char* t2;
  949. char* t3;
  950. int n;
  951. int badpun;
  952. int code;
  953. int pun;
  954. Cctype_t flags;
  955. Info_t* ip;
  956. b = (unsigned char*)mp->fbuf;
  957. e = b + mp->fbsz;
  958. memzero(mp->count, sizeof(mp->count));
  959. memzero(mp->multi, sizeof(mp->multi));
  960. memzero(mp->identifier, sizeof(mp->identifier));
  961. /*
  962. * check character coding
  963. */
  964. flags = 0;
  965. while (b < e)
  966. flags |= mp->cctype[*b++];
  967. b = (unsigned char*)mp->fbuf;
  968. code = 0;
  969. q = CC_ASCII;
  970. n = CC_MASK;
  971. for (c = 0; c < CC_MAPS; c++)
  972. {
  973. flags ^= CC_text;
  974. if ((flags & CC_MASK) < n)
  975. {
  976. n = flags & CC_MASK;
  977. q = c;
  978. }
  979. flags >>= CC_BIT;
  980. }
  981. flags = n;
  982. if (!(flags & (CC_binary|CC_notext)))
  983. {
  984. if (q != CC_NATIVE)
  985. {
  986. code = q;
  987. ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
  988. }
  989. if (b[0] == '#' && b[1] == '!')
  990. {
  991. for (b += 2; b < e && isspace(*b); b++);
  992. for (s = (char*)b; b < e && isprint(*b); b++);
  993. c = *b;
  994. *b = 0;
  995. if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
  996. {
  997. if (t = strrchr(s, '/'))
  998. s = t + 1;
  999. for (t = s; *t; t++)
  1000. if (isspace(*t))
  1001. {
  1002. *t = 0;
  1003. break;
  1004. }
  1005. sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
  1006. mp->mime = mp->mbuf;
  1007. if (match(s, "*sh"))
  1008. {
  1009. t1 = T("command");
  1010. if (streq(s, "sh"))
  1011. *s = 0;
  1012. else
  1013. {
  1014. *b++ = ' ';
  1015. *b = 0;
  1016. }
  1017. }
  1018. else
  1019. {
  1020. t1 = T("interpreter");
  1021. *b++ = ' ';
  1022. *b = 0;
  1023. }
  1024. sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
  1025. s = mp->sbuf;
  1026. goto qualify;
  1027. }
  1028. *b = c;
  1029. b = (unsigned char*)mp->fbuf;
  1030. }
  1031. badpun = 0;
  1032. pun = 0;
  1033. q = 0;
  1034. s = 0;
  1035. t = 0;
  1036. while (b < e)
  1037. {
  1038. c = *b++;
  1039. mp->count[c]++;
  1040. if (c == q && (q != '*' || *b == '/' && b++))
  1041. {
  1042. mp->multi[q]++;
  1043. q = 0;
  1044. }
  1045. else if (c == '\\')
  1046. {
  1047. s = 0;
  1048. b++;
  1049. }
  1050. else if (!q)
  1051. {
  1052. if (isalpha(c) || c == '_')
  1053. {
  1054. if (!s)
  1055. s = (char*)b - 1;
  1056. }
  1057. else if (!isdigit(c))
  1058. {
  1059. if (s)
  1060. {
  1061. if (s > mp->fbuf)
  1062. switch (*(s - 1))
  1063. {
  1064. case ':':
  1065. if (*b == ':')
  1066. mp->multi[':']++;
  1067. break;
  1068. case '.':
  1069. if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
  1070. mp->multi['.']++;
  1071. break;
  1072. case '\n':
  1073. case '\\':
  1074. if (*b == '{')
  1075. t = (char*)b + 1;
  1076. break;
  1077. case '{':
  1078. if (s == t && *b == '}')
  1079. mp->multi['X']++;
  1080. break;
  1081. }
  1082. if (!mp->idtab)
  1083. {
  1084. if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
  1085. for (q = 0; q < elementsof(dict); q++)
  1086. dtinsert(mp->idtab, &dict[q]);
  1087. else if (mp->disc->errorf)
  1088. (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
  1089. q = 0;
  1090. }
  1091. if (mp->idtab)
  1092. {
  1093. *(b - 1) = 0;
  1094. if (ip = (Info_t*)dtmatch(mp->idtab, s))
  1095. mp->identifier[ip->value]++;
  1096. *(b - 1) = c;
  1097. }
  1098. s = 0;
  1099. }
  1100. switch (c)
  1101. {
  1102. case '\t':
  1103. if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
  1104. mp->multi['\t']++;
  1105. break;
  1106. case '"':
  1107. case '\'':
  1108. q = c;
  1109. break;
  1110. case '/':
  1111. if (*b == '*')
  1112. q = *b++;
  1113. else if (*b == '/')
  1114. q = '\n';
  1115. break;
  1116. case '$':
  1117. if (*b == '(' && *(b + 1) != ' ')
  1118. mp->multi['$']++;
  1119. break;
  1120. case '{':
  1121. case '}':
  1122. case '[':
  1123. case ']':
  1124. case '(':
  1125. mp->multi[c]++;
  1126. break;
  1127. case ')':
  1128. mp->multi[c]++;
  1129. goto punctuation;
  1130. case ':':
  1131. if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
  1132. mp->multi[':']++;
  1133. goto punctuation;
  1134. case '.':
  1135. case ',':
  1136. case '%':
  1137. case ';':
  1138. case '?':
  1139. punctuation:
  1140. pun++;
  1141. if (*b != ' ' && *b != '\n')
  1142. badpun++;
  1143. break;
  1144. }
  1145. }
  1146. }
  1147. }
  1148. }
  1149. else
  1150. while (b < e)
  1151. mp->count[*b++]++;
  1152. base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
  1153. suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
  1154. if (!flags)
  1155. {
  1156. if (match(suff, "*sh|bat|cmd"))
  1157. goto id_sh;
  1158. if (match(base, "*@(mkfile)"))
  1159. goto id_mk;
  1160. if (match(base, "*@(makefile|.mk)"))
  1161. goto id_make;
  1162. if (match(base, "*@(mamfile|.mam)"))
  1163. goto id_mam;
  1164. if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
  1165. goto id_c;
  1166. if (match(suff, "f"))
  1167. goto id_fortran;
  1168. if (match(suff, "htm+(l)"))
  1169. goto id_html;
  1170. if (match(suff, "cpy"))
  1171. goto id_copybook;
  1172. if (match(suff, "cob|cbl|cb2"))
  1173. goto id_cobol;
  1174. if (match(suff, "pl[1i]"))
  1175. goto id_pl1;
  1176. if (match(suff, "tex"))
  1177. goto id_tex;
  1178. if (match(suff, "asm|s"))
  1179. goto id_asm;
  1180. if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
  1181. {
  1182. id_sh:
  1183. s = T("command script");
  1184. mp->mime = "application/sh";
  1185. goto qualify;
  1186. }
  1187. if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
  1188. {
  1189. s = T("mail message");
  1190. mp->mime = "message/rfc822";
  1191. goto qualify;
  1192. }
  1193. if (match(base, "*@(mkfile)"))
  1194. {
  1195. id_mk:
  1196. s = "mkfile";
  1197. mp->mime = "application/mk";
  1198. goto qualify;
  1199. }
  1200. if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
  1201. {
  1202. id_make:
  1203. s = "makefile";
  1204. mp->mime = "application/make";
  1205. goto qualify;
  1206. }
  1207. if (mp->multi['.'] >= 3)
  1208. {
  1209. s = T("nroff input");
  1210. mp->mime = "application/x-troff";
  1211. goto qualify;
  1212. }
  1213. if (mp->multi['X'] >= 3)
  1214. {
  1215. s = T("TeX input");
  1216. mp->mime = "application/x-tex";
  1217. goto qualify;
  1218. }
  1219. if (mp->fbsz < SF_BUFSIZE &&
  1220. (mp->multi['('] == mp->multi[')'] &&
  1221. mp->multi['{'] == mp->multi['}'] &&
  1222. mp->multi['['] == mp->multi[']']) ||
  1223. mp->fbsz >= SF_BUFSIZE &&
  1224. (mp->multi['('] >= mp->multi[')'] &&
  1225. mp->multi['{'] >= mp->multi['}'] &&
  1226. mp->multi['['] >= mp->multi[']']))
  1227. {
  1228. c = mp->identifier[ID_INCL1];
  1229. if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
  1230. mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
  1231. mp->count['='] >= 20 && mp->count[';'] >= 20)
  1232. {
  1233. id_c:
  1234. t1 = "";
  1235. t2 = "c ";
  1236. t3 = T("program");
  1237. switch (*suff)
  1238. {
  1239. case 'c':
  1240. case 'C':
  1241. mp->mime = "application/x-cc";
  1242. break;
  1243. case 'l':
  1244. case 'L':
  1245. t1 = "lex ";
  1246. mp->mime = "application/x-lex";
  1247. break;
  1248. default:
  1249. t3 = T("header");
  1250. if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
  1251. {
  1252. mp->mime = "application/x-cc";
  1253. break;
  1254. }
  1255. /*FALLTHROUGH*/
  1256. case 'y':
  1257. case 'Y':
  1258. t1 = "yacc ";
  1259. mp->mime = "application/x-yacc";
  1260. break;
  1261. }
  1262. if (mp->identifier[ID_CPLUSPLUS] >= 3)
  1263. {
  1264. t2 = "c++ ";
  1265. mp->mime = "application/x-c++";
  1266. }
  1267. sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
  1268. s = mp->sbuf;
  1269. goto qualify;
  1270. }
  1271. }
  1272. if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
  1273. (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
  1274. mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
  1275. {
  1276. id_mam:
  1277. s = T("mam program");
  1278. mp->mime = "application/x-mam";
  1279. goto qualify;
  1280. }
  1281. if (mp->identifier[ID_FORTRAN] >= 8)
  1282. {
  1283. id_fortran:
  1284. s = T("fortran program");
  1285. mp->mime = "application/x-fortran";
  1286. goto qualify;
  1287. }
  1288. if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
  1289. {
  1290. id_html:
  1291. s = T("html input");
  1292. mp->mime = "text/html";
  1293. goto qualify;
  1294. }
  1295. if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
  1296. {
  1297. id_copybook:
  1298. s = T("cobol copybook");
  1299. mp->mime = "application/x-cobol";
  1300. goto qualify;
  1301. }
  1302. if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
  1303. {
  1304. id_cobol:
  1305. s = T("cobol program");
  1306. mp->mime = "application/x-cobol";
  1307. goto qualify;
  1308. }
  1309. if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
  1310. {
  1311. id_pl1:
  1312. s = T("pl1 program");
  1313. mp->mime = "application/x-pl1";
  1314. goto qualify;
  1315. }
  1316. if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
  1317. {
  1318. id_tex:
  1319. s = T("TeX input");
  1320. mp->mime = "text/tex";
  1321. goto qualify;
  1322. }
  1323. if (mp->identifier[ID_ASM] >= 4)
  1324. {
  1325. id_asm:
  1326. s = T("as program");
  1327. mp->mime = "application/x-as";
  1328. goto qualify;
  1329. }
  1330. if (ckenglish(mp, pun, badpun))
  1331. {
  1332. s = T("english text");
  1333. mp->mime = "text/plain";
  1334. goto qualify;
  1335. }
  1336. }
  1337. else if (streq(base, "core"))
  1338. {
  1339. mp->mime = "x-system/core";
  1340. return T("core dump");
  1341. }
  1342. if (flags & (CC_binary|CC_notext))
  1343. {
  1344. b = (unsigned char*)mp->fbuf;
  1345. e = b + mp->fbsz;
  1346. n = 0;
  1347. for (;;)
  1348. {
  1349. c = *b++;
  1350. q = 0;
  1351. while (c & 0x80)
  1352. {
  1353. c <<= 1;
  1354. q++;
  1355. }
  1356. switch (q)
  1357. {
  1358. case 4:
  1359. if (b < e && (*b++ & 0xc0) != 0x80)
  1360. break;
  1361. case 3:
  1362. if (b < e && (*b++ & 0xc0) != 0x80)
  1363. break;
  1364. case 2:
  1365. if (b < e && (*b++ & 0xc0) != 0x80)
  1366. break;
  1367. n = 1;
  1368. case 0:
  1369. if (b >= e)
  1370. {
  1371. if (n)
  1372. {
  1373. flags &= ~(CC_binary|CC_notext);
  1374. flags |= CC_utf_8;
  1375. }
  1376. break;
  1377. }
  1378. continue;
  1379. }
  1380. break;
  1381. }
  1382. }
  1383. if (flags & (CC_binary|CC_notext))
  1384. {
  1385. unsigned long d = 0;
  1386. if ((q = mp->fbsz / UCHAR_MAX) >= 2)
  1387. {
  1388. /*
  1389. * compression/encryption via standard deviation
  1390. */
  1391. for (c = 0; c < UCHAR_MAX; c++)
  1392. {
  1393. pun = mp->count[c] - q;
  1394. d += pun * pun;
  1395. }
  1396. d /= mp->fbsz;
  1397. }
  1398. if (d <= 0)
  1399. s = T("binary");
  1400. else if (d < 4)
  1401. s = T("encrypted");
  1402. else if (d < 16)
  1403. s = T("packed");
  1404. else if (d < 64)
  1405. s = T("compressed");
  1406. else if (d < 256)
  1407. s = T("delta");
  1408. else
  1409. s = T("data");
  1410. mp->mime = "application/octet-stream";
  1411. return s;
  1412. }
  1413. mp->mime = "text/plain";
  1414. if (flags & CC_utf_8)
  1415. s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
  1416. else if (flags & CC_latin)
  1417. s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
  1418. else
  1419. s = (flags & CC_control) ? T("text with control characters") : T("text");
  1420. qualify:
  1421. if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
  1422. {
  1423. t = "dos ";
  1424. mp->mime = "text/dos";
  1425. }
  1426. else
  1427. t = "";
  1428. if (code)
  1429. {
  1430. if (code == CC_ASCII)
  1431. sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
  1432. else
  1433. {
  1434. sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
  1435. mp->mime = "text/ebcdic";
  1436. }
  1437. s = buf;
  1438. }
  1439. else if (*t)
  1440. {
  1441. sfsprintf(buf, PATH_MAX, "%s%s", t, s);
  1442. s = buf;
  1443. }
  1444. return s;
  1445. }
  1446. /*
  1447. * return the basic magic string for file,st in buf,size
  1448. */
  1449. static char*
  1450. type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
  1451. {
  1452. register char* s;
  1453. register char* t;
  1454. mp->mime = 0;
  1455. if (!S_ISREG(st->st_mode))
  1456. {
  1457. if (S_ISDIR(st->st_mode))
  1458. {
  1459. mp->mime = "x-system/dir";
  1460. return T("directory");
  1461. }
  1462. if (S_ISLNK(st->st_mode))
  1463. {
  1464. mp->mime = "x-system/lnk";
  1465. s = buf;
  1466. s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
  1467. if (pathgetlink(file, s, size - (s - buf)) < 0)
  1468. return T("cannot read symbolic link text");
  1469. return buf;
  1470. }
  1471. if (S_ISBLK(st->st_mode))
  1472. {
  1473. mp->mime = "x-system/blk";
  1474. sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
  1475. return buf;
  1476. }
  1477. if (S_ISCHR(st->st_mode))
  1478. {
  1479. mp->mime = "x-system/chr";
  1480. sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
  1481. return buf;
  1482. }
  1483. if (S_ISFIFO(st->st_mode))
  1484. {
  1485. mp->mime = "x-system/fifo";
  1486. return "fifo";
  1487. }
  1488. #ifdef S_ISSOCK
  1489. if (S_ISSOCK(st->st_mode))
  1490. {
  1491. mp->mime = "x-system/sock";
  1492. return "socket";
  1493. }
  1494. #endif
  1495. }
  1496. if (!(mp->fbmx = st->st_size))
  1497. s = T("empty");
  1498. else if (!mp->fp)
  1499. s = T("cannot read");
  1500. else
  1501. {
  1502. mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
  1503. if (mp->fbsz < 0)
  1504. s = fmterror(errno);
  1505. else if (mp->fbsz == 0)
  1506. s = T("empty");
  1507. else
  1508. {
  1509. mp->fbuf[mp->fbsz] = 0;
  1510. mp->xoff = 0;
  1511. mp->xbsz = 0;
  1512. if (!(s = ckmagic(mp, file, buf, st, 0)))
  1513. s = cklang(mp, file, buf, st);
  1514. }
  1515. }
  1516. if (!mp->mime)
  1517. mp->mime = "application/unknown";
  1518. else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
  1519. {
  1520. register char* b;
  1521. register char* be;
  1522. register char* m;
  1523. register char* me;
  1524. b = mp->mime;
  1525. me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
  1526. while (m < me && b < t)
  1527. *m++ = *b++;
  1528. b = t = s;
  1529. for (;;)
  1530. {
  1531. if (!(be = strchr(t, ' ')))
  1532. {
  1533. be = b + strlen(b);
  1534. break;
  1535. }
  1536. if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
  1537. break;
  1538. b = t;
  1539. t = be + 1;
  1540. }
  1541. while (m < me && b < be)
  1542. if ((*m++ = *b++) == ' ')
  1543. *(m - 1) = '-';
  1544. *m = 0;
  1545. }
  1546. return s;
  1547. }
  1548. /*
  1549. * low level for magicload()
  1550. */
  1551. static int
  1552. load(register Magic_t* mp, char* file, register Sfio_t* fp)
  1553. {
  1554. register Entry_t* ep;
  1555. register char* p;
  1556. register char* p2;
  1557. char* p3;
  1558. char* next;
  1559. int n;
  1560. int lge;
  1561. int lev;
  1562. int ent;
  1563. int old;
  1564. int cont;
  1565. Info_t* ip;
  1566. Entry_t* ret;
  1567. Entry_t* first;
  1568. Entry_t* last = 0;
  1569. Entry_t* fun['z' - 'a' + 1];
  1570. memzero(fun, sizeof(fun));
  1571. cont = '$';
  1572. ent = 0;
  1573. lev = 0;
  1574. old = 0;
  1575. ret = 0;
  1576. error_info.file = file;
  1577. error_info.line = 0;
  1578. first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
  1579. while (p = sfgetr(fp, '\n', 1))
  1580. {
  1581. error_info.line++;
  1582. for (; isspace(*p); p++);
  1583. /*
  1584. * nesting
  1585. */
  1586. switch (*p)
  1587. {
  1588. case 0:
  1589. case '#':
  1590. cont = '#';
  1591. continue;
  1592. case '{':
  1593. if (++lev < MAXNEST)
  1594. ep->nest = *p;
  1595. else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  1596. (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
  1597. continue;
  1598. case '}':
  1599. if (!last || lev <= 0)
  1600. {
  1601. if (mp->disc->errorf)
  1602. (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
  1603. }
  1604. else if (lev-- == ent)
  1605. {
  1606. ent = 0;
  1607. ep->cont = ':';
  1608. ep->offset = ret->offset;
  1609. ep->nest = ' ';
  1610. ep->type = ' ';
  1611. ep->op = ' ';
  1612. ep->desc = "[RETURN]";
  1613. last = ep;
  1614. ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
  1615. ret = 0;
  1616. }
  1617. else
  1618. last->nest = *p;
  1619. continue;
  1620. default:
  1621. if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
  1622. {
  1623. n = *p++;
  1624. if (n >= 'a' && n <= 'z')
  1625. n -= 'a';
  1626. else
  1627. {
  1628. if (mp->disc->errorf)
  1629. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
  1630. n = 0;
  1631. }
  1632. if (ret && mp->disc->errorf)
  1633. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
  1634. if (*p == '{')
  1635. {
  1636. ent = ++lev;
  1637. ret = ep;
  1638. ep->desc = "[FUNCTION]";
  1639. }
  1640. else
  1641. {
  1642. if (*(p + 1) != ')' && mp->disc->errorf)
  1643. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
  1644. ep->desc = "[CALL]";
  1645. }
  1646. ep->cont = cont;
  1647. ep->offset = n;
  1648. ep->nest = ' ';
  1649. ep->type = ' ';
  1650. ep->op = ' ';
  1651. last = ep;
  1652. ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
  1653. if (ret)
  1654. fun[n] = last->value.lab = ep;
  1655. else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
  1656. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
  1657. continue;
  1658. }
  1659. if (!ep->nest)
  1660. ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
  1661. break;
  1662. }
  1663. /*
  1664. * continuation
  1665. */
  1666. cont = '$';
  1667. switch (*p)
  1668. {
  1669. case '>':
  1670. old = 1;
  1671. if (*(p + 1) == *p)
  1672. {
  1673. /*
  1674. * old style nesting push
  1675. */
  1676. p++;
  1677. old = 2;
  1678. if (!lev && last)
  1679. {
  1680. lev = 1;
  1681. last->nest = '{';
  1682. if (last->cont == '>')
  1683. last->cont = '&';
  1684. ep->nest = '1';
  1685. }
  1686. }
  1687. /*FALLTHROUGH*/
  1688. case '+':
  1689. case '&':
  1690. case '|':
  1691. ep->cont = *p++;
  1692. break;
  1693. default:
  1694. if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
  1695. (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
  1696. /*FALLTHROUGH*/
  1697. case '*':
  1698. case '0': case '1': case '2': case '3': case '4':
  1699. case '5': case '6': case '7': case '8': case '9':
  1700. ep->cont = (lev > 0) ? '&' : '#';
  1701. break;
  1702. }
  1703. switch (old)
  1704. {
  1705. case 1:
  1706. old = 0;
  1707. if (lev)
  1708. {
  1709. /*
  1710. * old style nesting pop
  1711. */
  1712. lev = 0;
  1713. if (last)
  1714. last->nest = '}';
  1715. ep->nest = ' ';
  1716. if (ep->cont == '&')
  1717. ep->cont = '#';
  1718. }
  1719. break;
  1720. case 2:
  1721. old = 1;
  1722. break;
  1723. }
  1724. if (isdigit(*p))
  1725. {
  1726. /*
  1727. * absolute offset
  1728. */
  1729. ep->offset = strton(p, &next, NiL, 0);
  1730. p2 = next;
  1731. }
  1732. else
  1733. {
  1734. for (p2 = p; *p2 && !isspace(*p2); p2++);
  1735. if (!*p2)
  1736. {
  1737. if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  1738. (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
  1739. continue;
  1740. }
  1741. /*
  1742. * offset expression
  1743. */
  1744. *p2++ = 0;
  1745. ep->expr = vmstrdup(mp->vm, p);
  1746. if (isalpha(*p))
  1747. ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
  1748. else if (*p == '(' && ep->cont == '>')
  1749. {
  1750. /*
  1751. * convert old style indirection to @
  1752. */
  1753. p = ep->expr + 1;
  1754. for (;;)
  1755. {
  1756. switch (*p++)
  1757. {
  1758. case 0:
  1759. case '@':
  1760. case '(':
  1761. break;
  1762. case ')':
  1763. break;
  1764. default:
  1765. continue;
  1766. }
  1767. break;
  1768. }
  1769. if (*--p == ')')
  1770. {
  1771. *p = 0;
  1772. *ep->expr = '@';
  1773. }
  1774. }
  1775. }
  1776. for (; isspace(*p2); p2++);
  1777. for (p = p2; *p2 && !isspace(*p2); p2++);
  1778. if (!*p2)
  1779. {
  1780. if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  1781. (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
  1782. continue;
  1783. }
  1784. *p2++ = 0;
  1785. /*
  1786. * type
  1787. */
  1788. if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
  1789. {
  1790. ep->swap = ~(*p == 'l' ? 7 : 0);
  1791. p += 2;
  1792. }
  1793. if (*p == 's')
  1794. {
  1795. if (*(p + 1) == 'h')
  1796. ep->type = 'h';
  1797. else
  1798. ep->type = 's';
  1799. }
  1800. else if (*p == 'a')
  1801. ep->type = 's';
  1802. else
  1803. ep->type = *p;
  1804. if (p = strchr(p, '&'))
  1805. {
  1806. /*
  1807. * old style mask
  1808. */
  1809. ep->mask = strton(++p, NiL, NiL, 0);
  1810. }
  1811. for (; isspace(*p2); p2++);
  1812. if (ep->mask)
  1813. *--p2 = '=';
  1814. /*
  1815. * comparison operation
  1816. */
  1817. p = p2;
  1818. if (p2 = strchr(p, '\t'))
  1819. *p2++ = 0;
  1820. else
  1821. {
  1822. int qe = 0;
  1823. int qn = 0;
  1824. /*
  1825. * assume balanced {}[]()\\""'' field
  1826. */
  1827. for (p2 = p;;)
  1828. {
  1829. switch (n = *p2++)
  1830. {
  1831. case 0:
  1832. break;
  1833. case '{':
  1834. if (!qe)
  1835. qe = '}';
  1836. if (qe == '}')
  1837. qn++;
  1838. continue;
  1839. case '(':
  1840. if (!qe)
  1841. qe = ')';
  1842. if (qe == ')')
  1843. qn++;
  1844. continue;
  1845. case '[':
  1846. if (!qe)
  1847. qe = ']';
  1848. if (qe == ']')
  1849. qn++;
  1850. continue;
  1851. case '}':
  1852. case ')':
  1853. case ']':
  1854. if (qe == n && qn > 0)
  1855. qn--;
  1856. continue;
  1857. case '"':
  1858. case '\'':
  1859. if (!qe)
  1860. qe = n;
  1861. else if (qe == n)
  1862. qe = 0;
  1863. continue;
  1864. case '\\':
  1865. if (*p2)
  1866. p2++;
  1867. continue;
  1868. default:
  1869. if (!qe && isspace(n))
  1870. break;
  1871. continue;
  1872. }
  1873. if (n)
  1874. *(p2 - 1) = 0;
  1875. else
  1876. p2--;
  1877. break;
  1878. }
  1879. }
  1880. lge = 0;
  1881. if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
  1882. ep->op = '=';
  1883. else
  1884. {
  1885. if (*p == '&')
  1886. {
  1887. ep->mask = strton(++p, &next, NiL, 0);
  1888. p = next;
  1889. }
  1890. switch (*p)
  1891. {
  1892. case '=':
  1893. case '>':
  1894. case '<':
  1895. case '*':
  1896. ep->op = *p++;
  1897. if (*p == '=')
  1898. {
  1899. p++;
  1900. switch (ep->op)
  1901. {
  1902. case '>':
  1903. lge = -1;
  1904. break;
  1905. case '<':
  1906. lge = 1;
  1907. break;
  1908. }
  1909. }
  1910. break;
  1911. case '!':
  1912. case '@':
  1913. ep->op = *p++;
  1914. if (*p == '=')
  1915. p++;
  1916. break;
  1917. case 'x':
  1918. p++;
  1919. ep->op = '*';
  1920. break;
  1921. default:
  1922. ep->op = '=';
  1923. if (ep->mask)
  1924. ep->value.num = ep->mask;
  1925. break;
  1926. }
  1927. }
  1928. if (ep->op != '*' && !ep->value.num)
  1929. {
  1930. if (ep->type == 'e')
  1931. {
  1932. if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
  1933. {
  1934. ep->value.sub->re_disc = &mp->redisc;
  1935. if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
  1936. {
  1937. p += ep->value.sub->re_npat;
  1938. if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
  1939. p += ep->value.sub->re_npat;
  1940. }
  1941. if (n)
  1942. {
  1943. regmessage(mp, ep->value.sub, n);
  1944. ep->value.sub = 0;
  1945. }
  1946. else if (*p && mp->disc->errorf)
  1947. (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
  1948. }
  1949. }
  1950. else if (ep->type == 'm')
  1951. {
  1952. ep->mask = stresc(p) + 1;
  1953. ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
  1954. memcpy(ep->value.str, p, ep->mask);
  1955. if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
  1956. ep->value.str[ep->mask - 1] = '*';
  1957. }
  1958. else if (ep->type == 's')
  1959. {
  1960. ep->mask = stresc(p);
  1961. ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
  1962. memcpy(ep->value.str, p, ep->mask);
  1963. }
  1964. else if (*p == '\'')
  1965. {
  1966. stresc(p);
  1967. ep->value.num = *(unsigned char*)(p + 1) + lge;
  1968. }
  1969. else if (strmatch(p, "+([a-z])\\(*\\)"))
  1970. {
  1971. char* t;
  1972. t = p;
  1973. ep->type = 'V';
  1974. ep->op = *p;
  1975. while (*p && *p++ != '(');
  1976. switch (ep->op)
  1977. {
  1978. case 'l':
  1979. n = *p++;
  1980. if (n < 'a' || n > 'z')
  1981. {
  1982. if (mp->disc->errorf)
  1983. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
  1984. }
  1985. else if (!fun[n -= 'a'])
  1986. {
  1987. if (mp->disc->errorf)
  1988. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
  1989. }
  1990. else
  1991. {
  1992. ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
  1993. ep->value.loop->lab = fun[n];
  1994. while (*p && *p++ != ',');
  1995. ep->value.loop->start = strton(p, &t, NiL, 0);
  1996. while (*t && *t++ != ',');
  1997. ep->value.loop->size = strton(t, &t, NiL, 0);
  1998. }
  1999. break;
  2000. case 'm':
  2001. case 'r':
  2002. ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
  2003. ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
  2004. break;
  2005. case 'v':
  2006. break;
  2007. default:
  2008. if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  2009. (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
  2010. break;
  2011. }
  2012. }
  2013. else
  2014. {
  2015. ep->value.num = strton(p, NiL, NiL, 0) + lge;
  2016. if (ep->op == '@')
  2017. ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
  2018. }
  2019. }
  2020. /*
  2021. * file description
  2022. */
  2023. if (p2)
  2024. {
  2025. for (; isspace(*p2); p2++);
  2026. if (p = strchr(p2, '\t'))
  2027. {
  2028. /*
  2029. * check for message catalog index
  2030. */
  2031. *p++ = 0;
  2032. if (isalpha(*p2))
  2033. {
  2034. for (p3 = p2; isalnum(*p3); p3++);
  2035. if (*p3++ == ':')
  2036. {
  2037. for (; isdigit(*p3); p3++);
  2038. if (!*p3)
  2039. {
  2040. for (p2 = p; isspace(*p2); p2++);
  2041. if (p = strchr(p2, '\t'))
  2042. *p++ = 0;
  2043. }
  2044. }
  2045. }
  2046. }
  2047. stresc(p2);
  2048. ep->desc = vmstrdup(mp->vm, p2);
  2049. if (p)
  2050. {
  2051. for (; isspace(*p); p++);
  2052. if (*p)
  2053. ep->mime = vmstrdup(mp->vm, p);
  2054. }
  2055. }
  2056. else
  2057. ep->desc = "";
  2058. /*
  2059. * get next entry
  2060. */
  2061. last = ep;
  2062. ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
  2063. }
  2064. if (last)
  2065. {
  2066. last->next = 0;
  2067. if (mp->magiclast)
  2068. mp->magiclast->next = first;
  2069. else
  2070. mp->magic = first;
  2071. mp->magiclast = last;
  2072. }
  2073. vmfree(mp->vm, ep);
  2074. if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
  2075. {
  2076. if (lev < 0)
  2077. (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
  2078. else if (lev > 0)
  2079. (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
  2080. if (ret)
  2081. (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
  2082. }
  2083. error_info.file = 0;
  2084. error_info.line = 0;
  2085. return 0;
  2086. }
  2087. /*
  2088. * load a magic file into mp
  2089. */
  2090. int
  2091. magicload(register Magic_t* mp, const char* file, unsigned long flags)
  2092. {
  2093. register char* s;
  2094. register char* e;
  2095. register char* t;
  2096. int n;
  2097. int found;
  2098. int list;
  2099. Sfio_t* fp;
  2100. mp->flags = mp->disc->flags | flags;
  2101. found = 0;
  2102. if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
  2103. {
  2104. if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
  2105. s = MAGIC_FILE;
  2106. }
  2107. for (;;)
  2108. {
  2109. if (!list)
  2110. e = 0;
  2111. else if (e = strchr(s, ':'))
  2112. {
  2113. /*
  2114. * ok, so ~ won't work for the last list element
  2115. * we do it for MAGIC_FILES_ENV anyway
  2116. */
  2117. if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
  2118. {
  2119. sfputr(mp->tmp, t, -1);
  2120. s += n - 1;
  2121. }
  2122. sfwrite(mp->tmp, s, e - s);
  2123. if (!(s = sfstruse(mp->tmp)))
  2124. goto nospace;
  2125. }
  2126. if (!*s || streq(s, "-"))
  2127. s = MAGIC_FILE;
  2128. if (!(fp = sfopen(NiL, s, "r")))
  2129. {
  2130. if (list)
  2131. {
  2132. if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
  2133. {
  2134. strcpy(mp->fbuf, s);
  2135. sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
  2136. if (!(s = sfstruse(mp->tmp)))
  2137. goto nospace;
  2138. if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
  2139. goto next;
  2140. }
  2141. if (!(fp = sfopen(NiL, t, "r")))
  2142. goto next;
  2143. }
  2144. else
  2145. {
  2146. if (mp->disc->errorf)
  2147. (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
  2148. return -1;
  2149. }
  2150. }
  2151. found = 1;
  2152. n = load(mp, s, fp);
  2153. sfclose(fp);
  2154. if (n && !list)
  2155. return -1;
  2156. next:
  2157. if (!e)
  2158. break;
  2159. s = e + 1;
  2160. }
  2161. if (!found)
  2162. {
  2163. if (mp->flags & MAGIC_VERBOSE)
  2164. {
  2165. if (mp->disc->errorf)
  2166. (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
  2167. }
  2168. return -1;
  2169. }
  2170. return 0;
  2171. nospace:
  2172. if (mp->disc->errorf)
  2173. (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
  2174. return -1;
  2175. }
  2176. /*
  2177. * open a magic session
  2178. */
  2179. Magic_t*
  2180. magicopen(Magicdisc_t* disc)
  2181. {
  2182. register Magic_t* mp;
  2183. register int i;
  2184. register int n;
  2185. register int f;
  2186. register int c;
  2187. register Vmalloc_t* vm;
  2188. unsigned char* map[CC_MAPS + 1];
  2189. if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
  2190. return 0;
  2191. if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
  2192. {
  2193. vmclose(vm);
  2194. return 0;
  2195. }
  2196. mp->id = lib;
  2197. mp->disc = disc;
  2198. mp->vm = vm;
  2199. mp->flags = disc->flags;
  2200. mp->redisc.re_version = REG_VERSION;
  2201. mp->redisc.re_flags = REG_NOFREE;
  2202. mp->redisc.re_errorf = (regerror_t)disc->errorf;
  2203. mp->redisc.re_resizef = (regresize_t)vmgetmem;
  2204. mp->redisc.re_resizehandle = (void*)mp->vm;
  2205. mp->dtdisc.key = offsetof(Info_t, name);
  2206. mp->dtdisc.link = offsetof(Info_t, link);
  2207. if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
  2208. goto bad;
  2209. for (n = 0; n < elementsof(info); n++)
  2210. dtinsert(mp->infotab, &info[n]);
  2211. for (i = 0; i < CC_MAPS; i++)
  2212. map[i] = ccmap(i, CC_ASCII);
  2213. mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
  2214. for (n = 0; n <= UCHAR_MAX; n++)
  2215. {
  2216. f = 0;
  2217. i = CC_MAPS;
  2218. while (--i >= 0)
  2219. {
  2220. c = ccmapchr(map[i], n);
  2221. f = (f << CC_BIT) | CCTYPE(c);
  2222. }
  2223. mp->cctype[n] = f;
  2224. }
  2225. return mp;
  2226. bad:
  2227. magicclose(mp);
  2228. return 0;
  2229. }
  2230. /*
  2231. * close a magicopen() session
  2232. */
  2233. int
  2234. magicclose(register Magic_t* mp)
  2235. {
  2236. if (!mp)
  2237. return -1;
  2238. if (mp->tmp)