/usr/src/lib/libast/common/misc/magic.c
C | 2347 lines | 2221 code | 67 blank | 59 comment | 338 complexity | 9b0aff5ac94817988fa319976cfcca68 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, AGPL-3.0, BSD-3-Clause, LGPL-2.0, 0BSD, BSD-2-Clause, BSD-3-Clause-No-Nuclear-License-2014, AGPL-1.0, GPL-2.0
- /***********************************************************************
- * *
- * This software is part of the ast package *
- * Copyright (c) 1985-2010 AT&T Intellectual Property *
- * and is licensed under the *
- * Common Public License, Version 1.0 *
- * by AT&T Intellectual Property *
- * *
- * A copy of the License is available at *
- * http://www.opensource.org/licenses/cpl1.0.txt *
- * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
- * *
- * Information and Software Systems Research *
- * AT&T Research *
- * Florham Park NJ *
- * *
- * Glenn Fowler <gsf@research.att.com> *
- * David Korn <dgk@research.att.com> *
- * Phong Vo <kpv@research.att.com> *
- * *
- ***********************************************************************/
- #pragma prototyped
- /*
- * Glenn Fowler
- * AT&T Research
- *
- * library interface to file
- *
- * the sum of the hacks {s5,v10,planix} is _____ than the parts
- */
- static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n";
- static const char lib[] = "libast:magic";
- #include <ast.h>
- #include <ctype.h>
- #include <ccode.h>
- #include <dt.h>
- #include <modex.h>
- #include <error.h>
- #include <regex.h>
- #include <swap.h>
- #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
- #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
- #define MAXNEST 10 /* { ... } nesting limit */
- #define MINITEM 4 /* magic buffer rounding */
- typedef struct /* identifier dictionary entry */
- {
- const char name[16]; /* identifier name */
- int value; /* identifier value */
- Dtlink_t link; /* dictionary link */
- } Info_t;
- typedef struct Edit /* edit substitution */
- {
- struct Edit* next; /* next in list */
- regex_t* from; /* from pattern */
- } Edit_t;
- struct Entry;
- typedef struct /* loop info */
- {
- struct Entry* lab; /* call this function */
- int start; /* start here */
- int size; /* increment by this amount */
- int count; /* dynamic loop count */
- int offset; /* dynamic offset */
- } Loop_t;
- typedef struct Entry /* magic file entry */
- {
- struct Entry* next; /* next in list */
- char* expr; /* offset expression */
- union
- {
- unsigned long num;
- char* str;
- struct Entry* lab;
- regex_t* sub;
- Loop_t* loop;
- } value; /* comparison value */
- char* desc; /* file description */
- char* mime; /* file mime type */
- unsigned long offset; /* offset in bytes */
- unsigned long mask; /* mask before compare */
- char cont; /* continuation operation */
- char type; /* datum type */
- char op; /* comparison operation */
- char nest; /* { or } nesting operation */
- char swap; /* forced swap order */
- } Entry_t;
- #define CC_BIT 5
- #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
- typedef unsigned short Cctype_t;
- #else
- typedef unsigned long Cctype_t;
- #endif
- #define CC_text 0x01
- #define CC_control 0x02
- #define CC_latin 0x04
- #define CC_binary 0x08
- #define CC_utf_8 0x10
- #define CC_notext CC_text /* CC_text is flipped before checking */
- #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
- #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
- #define ID_NONE 0
- #define ID_ASM 1
- #define ID_C 2
- #define ID_COBOL 3
- #define ID_COPYBOOK 4
- #define ID_CPLUSPLUS 5
- #define ID_FORTRAN 6
- #define ID_HTML 7
- #define ID_INCL1 8
- #define ID_INCL2 9
- #define ID_INCL3 10
- #define ID_MAM1 11
- #define ID_MAM2 12
- #define ID_MAM3 13
- #define ID_NOTEXT 14
- #define ID_PL1 15
- #define ID_YACC 16
- #define ID_MAX ID_YACC
- #define INFO_atime 1
- #define INFO_blocks 2
- #define INFO_ctime 3
- #define INFO_fstype 4
- #define INFO_gid 5
- #define INFO_mode 6
- #define INFO_mtime 7
- #define INFO_name 8
- #define INFO_nlink 9
- #define INFO_size 10
- #define INFO_uid 11
- #define _MAGIC_PRIVATE_ \
- Magicdisc_t* disc; /* discipline */ \
- Vmalloc_t* vm; /* vmalloc region */ \
- Entry_t* magic; /* parsed magic table */ \
- Entry_t* magiclast; /* last entry in magic */ \
- char* mime; /* MIME type */ \
- unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
- char fbuf[SF_BUFSIZE + 1]; /* file data */ \
- char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
- char nbuf[256]; /* !CC_NATIVE data */ \
- char mbuf[64]; /* mime string */ \
- char sbuf[64]; /* type suffix string */ \
- char tbuf[2 * PATH_MAX]; /* type string */ \
- Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
- unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
- unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
- int keep[MAXNEST]; /* ckmagic nest stack */ \
- char* cap[MAXNEST]; /* ckmagic mime stack */ \
- char* msg[MAXNEST]; /* ckmagic text stack */ \
- Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
- int fbsz; /* fbuf size */ \
- int fbmx; /* fbuf max size */ \
- int xbsz; /* xbuf size */ \
- int swap; /* swap() operation */ \
- unsigned long flags; /* disc+open flags */ \
- long xoff; /* xbuf offset */ \
- int identifier[ID_MAX + 1]; /* Info_t identifier */ \
- Sfio_t* fp; /* fbuf fp */ \
- Sfio_t* tmp; /* tmp string */ \
- regdisc_t redisc; /* regex discipline */ \
- Dtdisc_t dtdisc; /* dict discipline */ \
- Dt_t* idtab; /* identifier dict */ \
- Dt_t* infotab; /* info keyword dict */
- #include <magic.h>
- static Info_t dict[] = /* keyword dictionary */
- {
- { "COMMON", ID_FORTRAN },
- { "COMPUTE", ID_COBOL },
- { "COMP", ID_COPYBOOK },
- { "COMPUTATIONAL",ID_COPYBOOK },
- { "DCL", ID_PL1 },
- { "DEFINED", ID_PL1 },
- { "DIMENSION", ID_FORTRAN },
- { "DIVISION", ID_COBOL },
- { "FILLER", ID_COPYBOOK },
- { "FIXED", ID_PL1 },
- { "FUNCTION", ID_FORTRAN },
- { "HTML", ID_HTML },
- { "INTEGER", ID_FORTRAN },
- { "MAIN", ID_PL1 },
- { "OPTIONS", ID_PL1 },
- { "PERFORM", ID_COBOL },
- { "PIC", ID_COPYBOOK },
- { "REAL", ID_FORTRAN },
- { "REDEFINES", ID_COPYBOOK },
- { "S9", ID_COPYBOOK },
- { "SECTION", ID_COBOL },
- { "SELECT", ID_COBOL },
- { "SUBROUTINE", ID_FORTRAN },
- { "TEXT", ID_ASM },
- { "VALUE", ID_COPYBOOK },
- { "attr", ID_MAM3 },
- { "binary", ID_YACC },
- { "block", ID_FORTRAN },
- { "bss", ID_ASM },
- { "byte", ID_ASM },
- { "char", ID_C },
- { "class", ID_CPLUSPLUS },
- { "clr", ID_NOTEXT },
- { "comm", ID_ASM },
- { "common", ID_FORTRAN },
- { "data", ID_ASM },
- { "dimension", ID_FORTRAN },
- { "done", ID_MAM2 },
- { "double", ID_C },
- { "even", ID_ASM },
- { "exec", ID_MAM3 },
- { "extern", ID_C },
- { "float", ID_C },
- { "function", ID_FORTRAN },
- { "globl", ID_ASM },
- { "h", ID_INCL3 },
- { "html", ID_HTML },
- { "include", ID_INCL1 },
- { "int", ID_C },
- { "integer", ID_FORTRAN },
- { "jmp", ID_NOTEXT },
- { "left", ID_YACC },
- { "libc", ID_INCL2 },
- { "long", ID_C },
- { "make", ID_MAM1 },
- { "mov", ID_NOTEXT },
- { "private", ID_CPLUSPLUS },
- { "public", ID_CPLUSPLUS },
- { "real", ID_FORTRAN },
- { "register", ID_C },
- { "right", ID_YACC },
- { "sfio", ID_INCL2 },
- { "static", ID_C },
- { "stdio", ID_INCL2 },
- { "struct", ID_C },
- { "subroutine", ID_FORTRAN },
- { "sys", ID_NOTEXT },
- { "term", ID_YACC },
- { "text", ID_ASM },
- { "tst", ID_NOTEXT },
- { "type", ID_YACC },
- { "typedef", ID_C },
- { "u", ID_INCL2 },
- { "union", ID_YACC },
- { "void", ID_C },
- };
- static Info_t info[] =
- {
- { "atime", INFO_atime },
- { "blocks", INFO_blocks },
- { "ctime", INFO_ctime },
- { "fstype", INFO_fstype },
- { "gid", INFO_gid },
- { "mode", INFO_mode },
- { "mtime", INFO_mtime },
- { "name", INFO_name },
- { "nlink", INFO_nlink },
- { "size", INFO_size },
- { "uid", INFO_uid },
- };
- /*
- * return pointer to data at offset off and size siz
- */
- static char*
- getdata(register Magic_t* mp, register long off, register int siz)
- {
- register long n;
- if (off < 0)
- return 0;
- if (off + siz <= mp->fbsz)
- return mp->fbuf + off;
- if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
- {
- if (off + siz > mp->fbmx)
- return 0;
- n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
- if (sfseek(mp->fp, n, SEEK_SET) != n)
- return 0;
- if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
- {
- mp->xoff = 0;
- mp->xbsz = 0;
- return 0;
- }
- mp->xbuf[mp->xbsz] = 0;
- mp->xoff = n;
- if (off + siz > mp->xoff + mp->xbsz)
- return 0;
- }
- return mp->xbuf + off - mp->xoff;
- }
- /*
- * @... evaluator for strexpr()
- */
- static long
- indirect(const char* cs, char** e, void* handle)
- {
- register char* s = (char*)cs;
- register Magic_t* mp = (Magic_t*)handle;
- register long n = 0;
- register char* p;
- if (s)
- {
- if (*s == '@')
- {
- n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
- switch (*(s = *e))
- {
- case 'b':
- case 'B':
- s++;
- if (p = getdata(mp, n, 1))
- n = *(unsigned char*)p;
- else
- s = (char*)cs;
- break;
- case 'h':
- case 'H':
- s++;
- if (p = getdata(mp, n, 2))
- n = swapget(mp->swap, p, 2);
- else
- s = (char*)cs;
- break;
- case 'q':
- case 'Q':
- s++;
- if (p = getdata(mp, n, 8))
- n = swapget(mp->swap, p, 8);
- else
- s = (char*)cs;
- break;
- default:
- if (isalnum(*s))
- s++;
- if (p = getdata(mp, n, 4))
- n = swapget(mp->swap, p, 4);
- else
- s = (char*)cs;
- break;
- }
- }
- *e = s;
- }
- else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
- return n;
- }
- /*
- * emit regex error message
- */
- static void
- regmessage(Magic_t* mp, regex_t* re, int code)
- {
- char buf[128];
- if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- {
- regerror(code, re, buf, sizeof(buf));
- (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
- }
- }
- /*
- * decompose vcodex(3) method composition
- */
- static char*
- vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
- {
- unsigned char* map;
- const char* o;
- int c;
- int n;
- int i;
- int a;
- map = CCMAP(CC_ASCII, CC_NATIVE);
- a = 0;
- i = 1;
- for (;;)
- {
- if (i)
- i = 0;
- else
- *b++ = '^';
- if (m < (x - 1) && !*(m + 1))
- {
- /*
- * obsolete indices
- */
- if (!a)
- {
- a = 1;
- o = "old, ";
- while (b < e && (c = *o++))
- *b++ = c;
- }
- switch (*m)
- {
- case 0: o = "delta"; break;
- case 1: o = "huffman"; break;
- case 2: o = "huffgroup"; break;
- case 3: o = "arith"; break;
- case 4: o = "bwt"; break;
- case 5: o = "rle"; break;
- case 6: o = "mtf"; break;
- case 7: o = "transpose"; break;
- case 8: o = "table"; break;
- case 9: o = "huffpart"; break;
- case 50: o = "map"; break;
- case 100: o = "recfm"; break;
- case 101: o = "ss7"; break;
- default: o = "UNKNOWN"; break;
- }
- m += 2;
- while (b < e && (c = *o++))
- *b++ = c;
- }
- else
- while (b < e && m < x && (c = *m++))
- {
- if (map)
- c = map[c];
- *b++ = c;
- }
- if (b >= e)
- break;
- n = 0;
- while (m < x)
- {
- n = (n<<7) | (*m & 0x7f);
- if (!(*m++ & 0x80))
- break;
- }
- if (n >= (x - m))
- break;
- m += n;
- }
- return b;
- }
- /*
- * check for magic table match in buf
- */
- static char*
- ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
- {
- register Entry_t* ep;
- register char* p;
- register char* b;
- register int level = 0;
- int call = -1;
- int c;
- char* q;
- char* t;
- char* base = 0;
- unsigned long num;
- unsigned long mask;
- regmatch_t matches[10];
- mp->swap = 0;
- b = mp->msg[0] = buf;
- mp->mime = mp->cap[0] = 0;
- mp->keep[0] = 0;
- for (ep = mp->magic; ep; ep = ep->next)
- {
- fun:
- if (ep->nest == '{')
- {
- if (++level >= MAXNEST)
- {
- call = -1;
- level = 0;
- mp->keep[0] = 0;
- b = mp->msg[0];
- mp->mime = mp->cap[0];
- continue;
- }
- mp->keep[level] = mp->keep[level - 1] != 0;
- mp->msg[level] = b;
- mp->cap[level] = mp->mime;
- }
- switch (ep->cont)
- {
- case '#':
- if (mp->keep[level] && b > buf)
- {
- *b = 0;
- return buf;
- }
- mp->swap = 0;
- b = mp->msg[0] = buf;
- mp->mime = mp->cap[0] = 0;
- if (ep->type == ' ')
- continue;
- break;
- case '$':
- if (mp->keep[level] && call < (MAXNEST - 1))
- {
- mp->ret[++call] = ep;
- ep = ep->value.lab;
- goto fun;
- }
- continue;
- case ':':
- ep = mp->ret[call--];
- if (ep->op == 'l')
- goto fun;
- continue;
- case '|':
- if (mp->keep[level] > 1)
- goto checknest;
- /*FALLTHROUGH*/
- default:
- if (!mp->keep[level])
- {
- b = mp->msg[level];
- mp->mime = mp->cap[level];
- goto checknest;
- }
- break;
- }
- p = "";
- num = 0;
- if (!ep->expr)
- num = ep->offset + off;
- else
- switch (ep->offset)
- {
- case 0:
- num = strexpr(ep->expr, NiL, indirect, mp) + off;
- break;
- case INFO_atime:
- num = st->st_atime;
- ep->type = 'D';
- break;
- case INFO_blocks:
- num = iblocks(st);
- ep->type = 'N';
- break;
- case INFO_ctime:
- num = st->st_ctime;
- ep->type = 'D';
- break;
- case INFO_fstype:
- p = fmtfs(st);
- ep->type = toupper(ep->type);
- break;
- case INFO_gid:
- if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
- {
- p = fmtgid(st->st_gid);
- ep->type = toupper(ep->type);
- }
- else
- {
- num = st->st_gid;
- ep->type = 'N';
- }
- break;
- case INFO_mode:
- if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
- {
- p = fmtmode(st->st_mode, 0);
- ep->type = toupper(ep->type);
- }
- else
- {
- num = modex(st->st_mode);
- ep->type = 'N';
- }
- break;
- case INFO_mtime:
- num = st->st_ctime;
- ep->type = 'D';
- break;
- case INFO_name:
- if (!base)
- {
- if (base = strrchr(file, '/'))
- base++;
- else
- base = (char*)file;
- }
- p = base;
- ep->type = toupper(ep->type);
- break;
- case INFO_nlink:
- num = st->st_nlink;
- ep->type = 'N';
- break;
- case INFO_size:
- num = st->st_size;
- ep->type = 'N';
- break;
- case INFO_uid:
- if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
- {
- p = fmtuid(st->st_uid);
- ep->type = toupper(ep->type);
- }
- else
- {
- num = st->st_uid;
- ep->type = 'N';
- }
- break;
- }
- switch (ep->type)
- {
- case 'b':
- if (!(p = getdata(mp, num, 1)))
- goto next;
- num = *(unsigned char*)p;
- break;
- case 'h':
- if (!(p = getdata(mp, num, 2)))
- goto next;
- num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
- break;
- case 'd':
- case 'l':
- case 'v':
- if (!(p = getdata(mp, num, 4)))
- goto next;
- num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
- break;
- case 'q':
- if (!(p = getdata(mp, num, 8)))
- goto next;
- num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
- break;
- case 'e':
- if (!(p = getdata(mp, num, 0)))
- goto next;
- /*FALLTHROUGH*/
- case 'E':
- if (!ep->value.sub)
- goto next;
- if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
- {
- c = mp->fbsz;
- if (c >= sizeof(mp->nbuf))
- c = sizeof(mp->nbuf) - 1;
- p = (char*)memcpy(mp->nbuf, p, c);
- p[c] = 0;
- ccmapstr(mp->x2n, p, c);
- if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
- {
- if (c != REG_NOMATCH)
- regmessage(mp, ep->value.sub, c);
- goto next;
- }
- }
- p = ep->value.sub->re_sub->re_buf;
- q = T(ep->desc);
- t = *q ? q : p;
- if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
- *b++ = ' ';
- b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
- if (ep->mime)
- mp->mime = ep->mime;
- goto checknest;
- case 's':
- if (!(p = getdata(mp, num, ep->mask)))
- goto next;
- goto checkstr;
- case 'm':
- if (!(p = getdata(mp, num, 0)))
- goto next;
- /*FALLTHROUGH*/
- case 'M':
- case 'S':
- checkstr:
- for (;;)
- {
- if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
- break;
- if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
- break;
- if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
- goto next;
- p = (char*)memcpy(mp->nbuf, p, ep->mask);
- p[ep->mask] = 0;
- ccmapstr(mp->x2n, p, ep->mask);
- }
- q = T(ep->desc);
- if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
- *b++ = ' ';
- for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
- *t = 0;
- b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
- *t = c;
- if (ep->mime)
- mp->mime = ep->mime;
- goto checknest;
- }
- if (mask = ep->mask)
- num &= mask;
- switch (ep->op)
- {
- case '=':
- case '@':
- if (num == ep->value.num)
- break;
- if (ep->cont != '#')
- goto next;
- if (!mask)
- mask = ~mask;
- if (ep->type == 'h')
- {
- if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
- {
- if (!(mp->swap & (mp->swap + 1)))
- mp->swap = 7;
- goto swapped;
- }
- }
- else if (ep->type == 'l')
- {
- for (c = 1; c < 4; c++)
- if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
- {
- if (!(mp->swap & (mp->swap + 1)))
- mp->swap = 7;
- goto swapped;
- }
- }
- else if (ep->type == 'q')
- {
- for (c = 1; c < 8; c++)
- if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
- goto swapped;
- }
- goto next;
- case '!':
- if (num != ep->value.num)
- break;
- goto next;
- case '^':
- if (num ^ ep->value.num)
- break;
- goto next;
- case '>':
- if (num > ep->value.num)
- break;
- goto next;
- case '<':
- if (num < ep->value.num)
- break;
- goto next;
- case 'l':
- if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
- {
- if (!ep->value.loop->count)
- {
- ep->value.loop->count = num;
- ep->value.loop->offset = off;
- off = ep->value.loop->start;
- }
- else if (!--ep->value.loop->count)
- {
- off = ep->value.loop->offset;
- goto next;
- }
- else
- off += ep->value.loop->size;
- mp->ret[++call] = ep;
- ep = ep->value.loop->lab;
- goto fun;
- }
- goto next;
- case 'm':
- c = mp->swap;
- t = ckmagic(mp, file, b + (b > buf), st, num);
- mp->swap = c;
- if (!t)
- goto next;
- if (b > buf)
- *b = ' ';
- b += strlen(b);
- break;
- case 'r':
- #if _UWIN
- {
- char* e;
- Sfio_t* rp;
- Sfio_t* gp;
- if (!(t = strrchr(file, '.')))
- goto next;
- sfprintf(mp->tmp, "/reg/classes_root/%s", t);
- if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
- goto next;
- *ep->desc = 0;
- *ep->mime = 0;
- gp = 0;
- while (t = sfgetr(rp, '\n', 1))
- {
- if (strneq(t, "Content Type=", 13))
- {
- ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
- strcpy(ep->mime, t + 13);
- if (gp)
- break;
- }
- else
- {
- sfprintf(mp->tmp, "/reg/classes_root/%s", t);
- if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
- {
- ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
- strcpy(ep->desc, t);
- if (*ep->mime)
- break;
- }
- }
- }
- sfclose(rp);
- if (!gp)
- goto next;
- if (!*ep->mime)
- {
- t = T(ep->desc);
- if (!strncasecmp(t, "microsoft", 9))
- t += 9;
- while (isspace(*t))
- t++;
- e = "application/x-ms-";
- ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
- e = strcopy(ep->mime, e);
- while ((c = *t++) && c != '.' && c != ' ')
- *e++ = isupper(c) ? tolower(c) : c;
- *e = 0;
- }
- while (t = sfgetr(gp, '\n', 1))
- if (*t && !streq(t, "\"\""))
- {
- ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
- strcpy(ep->desc, t);
- break;
- }
- sfclose(gp);
- if (!*ep->desc)
- goto next;
- if (!t)
- for (t = T(ep->desc); *t; t++)
- if (*t == '.')
- *t = ' ';
- if (!mp->keep[level])
- mp->keep[level] = 2;
- mp->mime = ep->mime;
- break;
- }
- #else
- if (ep->cont == '#' && !mp->keep[level])
- mp->keep[level] = 1;
- goto next;
- #endif
- case 'v':
- if (!(p = getdata(mp, num, 4)))
- goto next;
- c = 0;
- do
- {
- num++;
- c = (c<<7) | (*p & 0x7f);
- } while (*p++ & 0x80);
- if (!(p = getdata(mp, num, c)))
- goto next;
- if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
- {
- *b++ = ',';
- *b++ = ' ';
- }
- b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
- goto checknest;
- }
- swapped:
- q = T(ep->desc);
- if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
- *b++ = ' ';
- if (ep->type == 'd' || ep->type == 'D')
- b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
- else if (ep->type == 'v')
- b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
- else
- b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
- if (ep->mime && *ep->mime)
- mp->mime = ep->mime;
- checknest:
- if (ep->nest == '}')
- {
- if (!mp->keep[level])
- {
- b = mp->msg[level];
- mp->mime = mp->cap[level];
- }
- else if (level > 0)
- mp->keep[level - 1] = mp->keep[level];
- if (--level < 0)
- {
- level = 0;
- mp->keep[0] = 0;
- }
- }
- continue;
- next:
- if (ep->cont == '&')
- mp->keep[level] = 0;
- goto checknest;
- }
- if (mp->keep[level] && b > buf)
- {
- *b = 0;
- return buf;
- }
- return 0;
- }
- /*
- * check english language stats
- */
- static int
- ckenglish(register Magic_t* mp, int pun, int badpun)
- {
- register char* s;
- register int vowl = 0;
- register int freq = 0;
- register int rare = 0;
- if (5 * badpun > pun)
- return 0;
- if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
- return 0;
- if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
- return 0;
- for (s = "aeiou"; *s; s++)
- vowl += mp->count[toupper(*s)] + mp->count[*s];
- for (s = "etaion"; *s; s++)
- freq += mp->count[toupper(*s)] + mp->count[*s];
- for (s = "vjkqxz"; *s; s++)
- rare += mp->count[toupper(*s)] + mp->count[*s];
- return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
- }
- /*
- * check programming language stats
- */
- static char*
- cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
- {
- register int c;
- register unsigned char* b;
- register unsigned char* e;
- register int q;
- register char* s;
- char* t;
- char* base;
- char* suff;
- char* t1;
- char* t2;
- char* t3;
- int n;
- int badpun;
- int code;
- int pun;
- Cctype_t flags;
- Info_t* ip;
- b = (unsigned char*)mp->fbuf;
- e = b + mp->fbsz;
- memzero(mp->count, sizeof(mp->count));
- memzero(mp->multi, sizeof(mp->multi));
- memzero(mp->identifier, sizeof(mp->identifier));
- /*
- * check character coding
- */
- flags = 0;
- while (b < e)
- flags |= mp->cctype[*b++];
- b = (unsigned char*)mp->fbuf;
- code = 0;
- q = CC_ASCII;
- n = CC_MASK;
- for (c = 0; c < CC_MAPS; c++)
- {
- flags ^= CC_text;
- if ((flags & CC_MASK) < n)
- {
- n = flags & CC_MASK;
- q = c;
- }
- flags >>= CC_BIT;
- }
- flags = n;
- if (!(flags & (CC_binary|CC_notext)))
- {
- if (q != CC_NATIVE)
- {
- code = q;
- ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
- }
- if (b[0] == '#' && b[1] == '!')
- {
- for (b += 2; b < e && isspace(*b); b++);
- for (s = (char*)b; b < e && isprint(*b); b++);
- c = *b;
- *b = 0;
- if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
- {
- if (t = strrchr(s, '/'))
- s = t + 1;
- for (t = s; *t; t++)
- if (isspace(*t))
- {
- *t = 0;
- break;
- }
- sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
- mp->mime = mp->mbuf;
- if (match(s, "*sh"))
- {
- t1 = T("command");
- if (streq(s, "sh"))
- *s = 0;
- else
- {
- *b++ = ' ';
- *b = 0;
- }
- }
- else
- {
- t1 = T("interpreter");
- *b++ = ' ';
- *b = 0;
- }
- sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
- s = mp->sbuf;
- goto qualify;
- }
- *b = c;
- b = (unsigned char*)mp->fbuf;
- }
- badpun = 0;
- pun = 0;
- q = 0;
- s = 0;
- t = 0;
- while (b < e)
- {
- c = *b++;
- mp->count[c]++;
- if (c == q && (q != '*' || *b == '/' && b++))
- {
- mp->multi[q]++;
- q = 0;
- }
- else if (c == '\\')
- {
- s = 0;
- b++;
- }
- else if (!q)
- {
- if (isalpha(c) || c == '_')
- {
- if (!s)
- s = (char*)b - 1;
- }
- else if (!isdigit(c))
- {
- if (s)
- {
- if (s > mp->fbuf)
- switch (*(s - 1))
- {
- case ':':
- if (*b == ':')
- mp->multi[':']++;
- break;
- case '.':
- if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
- mp->multi['.']++;
- break;
- case '\n':
- case '\\':
- if (*b == '{')
- t = (char*)b + 1;
- break;
- case '{':
- if (s == t && *b == '}')
- mp->multi['X']++;
- break;
- }
- if (!mp->idtab)
- {
- if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
- for (q = 0; q < elementsof(dict); q++)
- dtinsert(mp->idtab, &dict[q]);
- else if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
- q = 0;
- }
- if (mp->idtab)
- {
- *(b - 1) = 0;
- if (ip = (Info_t*)dtmatch(mp->idtab, s))
- mp->identifier[ip->value]++;
- *(b - 1) = c;
- }
- s = 0;
- }
- switch (c)
- {
- case '\t':
- if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
- mp->multi['\t']++;
- break;
- case '"':
- case '\'':
- q = c;
- break;
- case '/':
- if (*b == '*')
- q = *b++;
- else if (*b == '/')
- q = '\n';
- break;
- case '$':
- if (*b == '(' && *(b + 1) != ' ')
- mp->multi['$']++;
- break;
- case '{':
- case '}':
- case '[':
- case ']':
- case '(':
- mp->multi[c]++;
- break;
- case ')':
- mp->multi[c]++;
- goto punctuation;
- case ':':
- if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
- mp->multi[':']++;
- goto punctuation;
- case '.':
- case ',':
- case '%':
- case ';':
- case '?':
- punctuation:
- pun++;
- if (*b != ' ' && *b != '\n')
- badpun++;
- break;
- }
- }
- }
- }
- }
- else
- while (b < e)
- mp->count[*b++]++;
- base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
- suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
- if (!flags)
- {
- if (match(suff, "*sh|bat|cmd"))
- goto id_sh;
- if (match(base, "*@(mkfile)"))
- goto id_mk;
- if (match(base, "*@(makefile|.mk)"))
- goto id_make;
- if (match(base, "*@(mamfile|.mam)"))
- goto id_mam;
- if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
- goto id_c;
- if (match(suff, "f"))
- goto id_fortran;
- if (match(suff, "htm+(l)"))
- goto id_html;
- if (match(suff, "cpy"))
- goto id_copybook;
- if (match(suff, "cob|cbl|cb2"))
- goto id_cobol;
- if (match(suff, "pl[1i]"))
- goto id_pl1;
- if (match(suff, "tex"))
- goto id_tex;
- if (match(suff, "asm|s"))
- goto id_asm;
- if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
- {
- id_sh:
- s = T("command script");
- mp->mime = "application/sh";
- goto qualify;
- }
- if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
- {
- s = T("mail message");
- mp->mime = "message/rfc822";
- goto qualify;
- }
- if (match(base, "*@(mkfile)"))
- {
- id_mk:
- s = "mkfile";
- mp->mime = "application/mk";
- goto qualify;
- }
- if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
- {
- id_make:
- s = "makefile";
- mp->mime = "application/make";
- goto qualify;
- }
- if (mp->multi['.'] >= 3)
- {
- s = T("nroff input");
- mp->mime = "application/x-troff";
- goto qualify;
- }
- if (mp->multi['X'] >= 3)
- {
- s = T("TeX input");
- mp->mime = "application/x-tex";
- goto qualify;
- }
- if (mp->fbsz < SF_BUFSIZE &&
- (mp->multi['('] == mp->multi[')'] &&
- mp->multi['{'] == mp->multi['}'] &&
- mp->multi['['] == mp->multi[']']) ||
- mp->fbsz >= SF_BUFSIZE &&
- (mp->multi['('] >= mp->multi[')'] &&
- mp->multi['{'] >= mp->multi['}'] &&
- mp->multi['['] >= mp->multi[']']))
- {
- c = mp->identifier[ID_INCL1];
- if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
- mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
- mp->count['='] >= 20 && mp->count[';'] >= 20)
- {
- id_c:
- t1 = "";
- t2 = "c ";
- t3 = T("program");
- switch (*suff)
- {
- case 'c':
- case 'C':
- mp->mime = "application/x-cc";
- break;
- case 'l':
- case 'L':
- t1 = "lex ";
- mp->mime = "application/x-lex";
- break;
- default:
- t3 = T("header");
- if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
- {
- mp->mime = "application/x-cc";
- break;
- }
- /*FALLTHROUGH*/
- case 'y':
- case 'Y':
- t1 = "yacc ";
- mp->mime = "application/x-yacc";
- break;
- }
- if (mp->identifier[ID_CPLUSPLUS] >= 3)
- {
- t2 = "c++ ";
- mp->mime = "application/x-c++";
- }
- sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
- s = mp->sbuf;
- goto qualify;
- }
- }
- if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
- (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
- mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
- {
- id_mam:
- s = T("mam program");
- mp->mime = "application/x-mam";
- goto qualify;
- }
- if (mp->identifier[ID_FORTRAN] >= 8)
- {
- id_fortran:
- s = T("fortran program");
- mp->mime = "application/x-fortran";
- goto qualify;
- }
- if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
- {
- id_html:
- s = T("html input");
- mp->mime = "text/html";
- goto qualify;
- }
- if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
- {
- id_copybook:
- s = T("cobol copybook");
- mp->mime = "application/x-cobol";
- goto qualify;
- }
- if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
- {
- id_cobol:
- s = T("cobol program");
- mp->mime = "application/x-cobol";
- goto qualify;
- }
- if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
- {
- id_pl1:
- s = T("pl1 program");
- mp->mime = "application/x-pl1";
- goto qualify;
- }
- if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
- {
- id_tex:
- s = T("TeX input");
- mp->mime = "text/tex";
- goto qualify;
- }
- if (mp->identifier[ID_ASM] >= 4)
- {
- id_asm:
- s = T("as program");
- mp->mime = "application/x-as";
- goto qualify;
- }
- if (ckenglish(mp, pun, badpun))
- {
- s = T("english text");
- mp->mime = "text/plain";
- goto qualify;
- }
- }
- else if (streq(base, "core"))
- {
- mp->mime = "x-system/core";
- return T("core dump");
- }
- if (flags & (CC_binary|CC_notext))
- {
- b = (unsigned char*)mp->fbuf;
- e = b + mp->fbsz;
- n = 0;
- for (;;)
- {
- c = *b++;
- q = 0;
- while (c & 0x80)
- {
- c <<= 1;
- q++;
- }
- switch (q)
- {
- case 4:
- if (b < e && (*b++ & 0xc0) != 0x80)
- break;
- case 3:
- if (b < e && (*b++ & 0xc0) != 0x80)
- break;
- case 2:
- if (b < e && (*b++ & 0xc0) != 0x80)
- break;
- n = 1;
- case 0:
- if (b >= e)
- {
- if (n)
- {
- flags &= ~(CC_binary|CC_notext);
- flags |= CC_utf_8;
- }
- break;
- }
- continue;
- }
- break;
- }
- }
- if (flags & (CC_binary|CC_notext))
- {
- unsigned long d = 0;
- if ((q = mp->fbsz / UCHAR_MAX) >= 2)
- {
- /*
- * compression/encryption via standard deviation
- */
- for (c = 0; c < UCHAR_MAX; c++)
- {
- pun = mp->count[c] - q;
- d += pun * pun;
- }
- d /= mp->fbsz;
- }
- if (d <= 0)
- s = T("binary");
- else if (d < 4)
- s = T("encrypted");
- else if (d < 16)
- s = T("packed");
- else if (d < 64)
- s = T("compressed");
- else if (d < 256)
- s = T("delta");
- else
- s = T("data");
- mp->mime = "application/octet-stream";
- return s;
- }
- mp->mime = "text/plain";
- if (flags & CC_utf_8)
- s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
- else if (flags & CC_latin)
- s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
- else
- s = (flags & CC_control) ? T("text with control characters") : T("text");
- qualify:
- if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
- {
- t = "dos ";
- mp->mime = "text/dos";
- }
- else
- t = "";
- if (code)
- {
- if (code == CC_ASCII)
- sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
- else
- {
- sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
- mp->mime = "text/ebcdic";
- }
- s = buf;
- }
- else if (*t)
- {
- sfsprintf(buf, PATH_MAX, "%s%s", t, s);
- s = buf;
- }
- return s;
- }
- /*
- * return the basic magic string for file,st in buf,size
- */
- static char*
- type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
- {
- register char* s;
- register char* t;
- mp->mime = 0;
- if (!S_ISREG(st->st_mode))
- {
- if (S_ISDIR(st->st_mode))
- {
- mp->mime = "x-system/dir";
- return T("directory");
- }
- if (S_ISLNK(st->st_mode))
- {
- mp->mime = "x-system/lnk";
- s = buf;
- s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
- if (pathgetlink(file, s, size - (s - buf)) < 0)
- return T("cannot read symbolic link text");
- return buf;
- }
- if (S_ISBLK(st->st_mode))
- {
- mp->mime = "x-system/blk";
- sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
- return buf;
- }
- if (S_ISCHR(st->st_mode))
- {
- mp->mime = "x-system/chr";
- sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
- return buf;
- }
- if (S_ISFIFO(st->st_mode))
- {
- mp->mime = "x-system/fifo";
- return "fifo";
- }
- #ifdef S_ISSOCK
- if (S_ISSOCK(st->st_mode))
- {
- mp->mime = "x-system/sock";
- return "socket";
- }
- #endif
- }
- if (!(mp->fbmx = st->st_size))
- s = T("empty");
- else if (!mp->fp)
- s = T("cannot read");
- else
- {
- mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
- if (mp->fbsz < 0)
- s = fmterror(errno);
- else if (mp->fbsz == 0)
- s = T("empty");
- else
- {
- mp->fbuf[mp->fbsz] = 0;
- mp->xoff = 0;
- mp->xbsz = 0;
- if (!(s = ckmagic(mp, file, buf, st, 0)))
- s = cklang(mp, file, buf, st);
- }
- }
- if (!mp->mime)
- mp->mime = "application/unknown";
- else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
- {
- register char* b;
- register char* be;
- register char* m;
- register char* me;
- b = mp->mime;
- me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
- while (m < me && b < t)
- *m++ = *b++;
- b = t = s;
- for (;;)
- {
- if (!(be = strchr(t, ' ')))
- {
- be = b + strlen(b);
- break;
- }
- if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
- break;
- b = t;
- t = be + 1;
- }
- while (m < me && b < be)
- if ((*m++ = *b++) == ' ')
- *(m - 1) = '-';
- *m = 0;
- }
- return s;
- }
- /*
- * low level for magicload()
- */
- static int
- load(register Magic_t* mp, char* file, register Sfio_t* fp)
- {
- register Entry_t* ep;
- register char* p;
- register char* p2;
- char* p3;
- char* next;
- int n;
- int lge;
- int lev;
- int ent;
- int old;
- int cont;
- Info_t* ip;
- Entry_t* ret;
- Entry_t* first;
- Entry_t* last = 0;
- Entry_t* fun['z' - 'a' + 1];
- memzero(fun, sizeof(fun));
- cont = '$';
- ent = 0;
- lev = 0;
- old = 0;
- ret = 0;
- error_info.file = file;
- error_info.line = 0;
- first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
- while (p = sfgetr(fp, '\n', 1))
- {
- error_info.line++;
- for (; isspace(*p); p++);
- /*
- * nesting
- */
- switch (*p)
- {
- case 0:
- case '#':
- cont = '#';
- continue;
- case '{':
- if (++lev < MAXNEST)
- ep->nest = *p;
- else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
- continue;
- case '}':
- if (!last || lev <= 0)
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
- }
- else if (lev-- == ent)
- {
- ent = 0;
- ep->cont = ':';
- ep->offset = ret->offset;
- ep->nest = ' ';
- ep->type = ' ';
- ep->op = ' ';
- ep->desc = "[RETURN]";
- last = ep;
- ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
- ret = 0;
- }
- else
- last->nest = *p;
- continue;
- default:
- if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
- {
- n = *p++;
- if (n >= 'a' && n <= 'z')
- n -= 'a';
- else
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
- n = 0;
- }
- if (ret && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
- if (*p == '{')
- {
- ent = ++lev;
- ret = ep;
- ep->desc = "[FUNCTION]";
- }
- else
- {
- if (*(p + 1) != ')' && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
- ep->desc = "[CALL]";
- }
- ep->cont = cont;
- ep->offset = n;
- ep->nest = ' ';
- ep->type = ' ';
- ep->op = ' ';
- last = ep;
- ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
- if (ret)
- fun[n] = last->value.lab = ep;
- else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
- continue;
- }
- if (!ep->nest)
- ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
- break;
- }
- /*
- * continuation
- */
- cont = '$';
- switch (*p)
- {
- case '>':
- old = 1;
- if (*(p + 1) == *p)
- {
- /*
- * old style nesting push
- */
- p++;
- old = 2;
- if (!lev && last)
- {
- lev = 1;
- last->nest = '{';
- if (last->cont == '>')
- last->cont = '&';
- ep->nest = '1';
- }
- }
- /*FALLTHROUGH*/
- case '+':
- case '&':
- case '|':
- ep->cont = *p++;
- break;
- default:
- if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
- /*FALLTHROUGH*/
- case '*':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- ep->cont = (lev > 0) ? '&' : '#';
- break;
- }
- switch (old)
- {
- case 1:
- old = 0;
- if (lev)
- {
- /*
- * old style nesting pop
- */
- lev = 0;
- if (last)
- last->nest = '}';
- ep->nest = ' ';
- if (ep->cont == '&')
- ep->cont = '#';
- }
- break;
- case 2:
- old = 1;
- break;
- }
- if (isdigit(*p))
- {
- /*
- * absolute offset
- */
- ep->offset = strton(p, &next, NiL, 0);
- p2 = next;
- }
- else
- {
- for (p2 = p; *p2 && !isspace(*p2); p2++);
- if (!*p2)
- {
- if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
- continue;
- }
- /*
- * offset expression
- */
- *p2++ = 0;
- ep->expr = vmstrdup(mp->vm, p);
- if (isalpha(*p))
- ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
- else if (*p == '(' && ep->cont == '>')
- {
- /*
- * convert old style indirection to @
- */
- p = ep->expr + 1;
- for (;;)
- {
- switch (*p++)
- {
- case 0:
- case '@':
- case '(':
- break;
- case ')':
- break;
- default:
- continue;
- }
- break;
- }
- if (*--p == ')')
- {
- *p = 0;
- *ep->expr = '@';
- }
- }
- }
- for (; isspace(*p2); p2++);
- for (p = p2; *p2 && !isspace(*p2); p2++);
- if (!*p2)
- {
- if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
- continue;
- }
- *p2++ = 0;
- /*
- * type
- */
- if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
- {
- ep->swap = ~(*p == 'l' ? 7 : 0);
- p += 2;
- }
- if (*p == 's')
- {
- if (*(p + 1) == 'h')
- ep->type = 'h';
- else
- ep->type = 's';
- }
- else if (*p == 'a')
- ep->type = 's';
- else
- ep->type = *p;
- if (p = strchr(p, '&'))
- {
- /*
- * old style mask
- */
- ep->mask = strton(++p, NiL, NiL, 0);
- }
- for (; isspace(*p2); p2++);
- if (ep->mask)
- *--p2 = '=';
- /*
- * comparison operation
- */
- p = p2;
- if (p2 = strchr(p, '\t'))
- *p2++ = 0;
- else
- {
- int qe = 0;
- int qn = 0;
- /*
- * assume balanced {}[]()\\""'' field
- */
- for (p2 = p;;)
- {
- switch (n = *p2++)
- {
- case 0:
- break;
- case '{':
- if (!qe)
- qe = '}';
- if (qe == '}')
- qn++;
- continue;
- case '(':
- if (!qe)
- qe = ')';
- if (qe == ')')
- qn++;
- continue;
- case '[':
- if (!qe)
- qe = ']';
- if (qe == ']')
- qn++;
- continue;
- case '}':
- case ')':
- case ']':
- if (qe == n && qn > 0)
- qn--;
- continue;
- case '"':
- case '\'':
- if (!qe)
- qe = n;
- else if (qe == n)
- qe = 0;
- continue;
- case '\\':
- if (*p2)
- p2++;
- continue;
- default:
- if (!qe && isspace(n))
- break;
- continue;
- }
- if (n)
- *(p2 - 1) = 0;
- else
- p2--;
- break;
- }
- }
- lge = 0;
- if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
- ep->op = '=';
- else
- {
- if (*p == '&')
- {
- ep->mask = strton(++p, &next, NiL, 0);
- p = next;
- }
- switch (*p)
- {
- case '=':
- case '>':
- case '<':
- case '*':
- ep->op = *p++;
- if (*p == '=')
- {
- p++;
- switch (ep->op)
- {
- case '>':
- lge = -1;
- break;
- case '<':
- lge = 1;
- break;
- }
- }
- break;
- case '!':
- case '@':
- ep->op = *p++;
- if (*p == '=')
- p++;
- break;
- case 'x':
- p++;
- ep->op = '*';
- break;
- default:
- ep->op = '=';
- if (ep->mask)
- ep->value.num = ep->mask;
- break;
- }
- }
- if (ep->op != '*' && !ep->value.num)
- {
- if (ep->type == 'e')
- {
- if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
- {
- ep->value.sub->re_disc = &mp->redisc;
- if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
- {
- p += ep->value.sub->re_npat;
- if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
- p += ep->value.sub->re_npat;
- }
- if (n)
- {
- regmessage(mp, ep->value.sub, n);
- ep->value.sub = 0;
- }
- else if (*p && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
- }
- }
- else if (ep->type == 'm')
- {
- ep->mask = stresc(p) + 1;
- ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
- memcpy(ep->value.str, p, ep->mask);
- if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
- ep->value.str[ep->mask - 1] = '*';
- }
- else if (ep->type == 's')
- {
- ep->mask = stresc(p);
- ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
- memcpy(ep->value.str, p, ep->mask);
- }
- else if (*p == '\'')
- {
- stresc(p);
- ep->value.num = *(unsigned char*)(p + 1) + lge;
- }
- else if (strmatch(p, "+([a-z])\\(*\\)"))
- {
- char* t;
- t = p;
- ep->type = 'V';
- ep->op = *p;
- while (*p && *p++ != '(');
- switch (ep->op)
- {
- case 'l':
- n = *p++;
- if (n < 'a' || n > 'z')
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
- }
- else if (!fun[n -= 'a'])
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
- }
- else
- {
- ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
- ep->value.loop->lab = fun[n];
- while (*p && *p++ != ',');
- ep->value.loop->start = strton(p, &t, NiL, 0);
- while (*t && *t++ != ',');
- ep->value.loop->size = strton(t, &t, NiL, 0);
- }
- break;
- case 'm':
- case 'r':
- ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
- ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
- break;
- case 'v':
- break;
- default:
- if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
- break;
- }
- }
- else
- {
- ep->value.num = strton(p, NiL, NiL, 0) + lge;
- if (ep->op == '@')
- ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
- }
- }
- /*
- * file description
- */
- if (p2)
- {
- for (; isspace(*p2); p2++);
- if (p = strchr(p2, '\t'))
- {
- /*
- * check for message catalog index
- */
- *p++ = 0;
- if (isalpha(*p2))
- {
- for (p3 = p2; isalnum(*p3); p3++);
- if (*p3++ == ':')
- {
- for (; isdigit(*p3); p3++);
- if (!*p3)
- {
- for (p2 = p; isspace(*p2); p2++);
- if (p = strchr(p2, '\t'))
- *p++ = 0;
- }
- }
- }
- }
- stresc(p2);
- ep->desc = vmstrdup(mp->vm, p2);
- if (p)
- {
- for (; isspace(*p); p++);
- if (*p)
- ep->mime = vmstrdup(mp->vm, p);
- }
- }
- else
- ep->desc = "";
- /*
- * get next entry
- */
- last = ep;
- ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
- }
- if (last)
- {
- last->next = 0;
- if (mp->magiclast)
- mp->magiclast->next = first;
- else
- mp->magic = first;
- mp->magiclast = last;
- }
- vmfree(mp->vm, ep);
- if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
- {
- if (lev < 0)
- (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
- else if (lev > 0)
- (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
- if (ret)
- (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
- }
- error_info.file = 0;
- error_info.line = 0;
- return 0;
- }
- /*
- * load a magic file into mp
- */
- int
- magicload(register Magic_t* mp, const char* file, unsigned long flags)
- {
- register char* s;
- register char* e;
- register char* t;
- int n;
- int found;
- int list;
- Sfio_t* fp;
- mp->flags = mp->disc->flags | flags;
- found = 0;
- if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
- {
- if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
- s = MAGIC_FILE;
- }
- for (;;)
- {
- if (!list)
- e = 0;
- else if (e = strchr(s, ':'))
- {
- /*
- * ok, so ~ won't work for the last list element
- * we do it for MAGIC_FILES_ENV anyway
- */
- if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
- {
- sfputr(mp->tmp, t, -1);
- s += n - 1;
- }
- sfwrite(mp->tmp, s, e - s);
- if (!(s = sfstruse(mp->tmp)))
- goto nospace;
- }
- if (!*s || streq(s, "-"))
- s = MAGIC_FILE;
- if (!(fp = sfopen(NiL, s, "r")))
- {
- if (list)
- {
- if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
- {
- strcpy(mp->fbuf, s);
- sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
- if (!(s = sfstruse(mp->tmp)))
- goto nospace;
- if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
- goto next;
- }
- if (!(fp = sfopen(NiL, t, "r")))
- goto next;
- }
- else
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
- return -1;
- }
- }
- found = 1;
- n = load(mp, s, fp);
- sfclose(fp);
- if (n && !list)
- return -1;
- next:
- if (!e)
- break;
- s = e + 1;
- }
- if (!found)
- {
- if (mp->flags & MAGIC_VERBOSE)
- {
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
- }
- return -1;
- }
- return 0;
- nospace:
- if (mp->disc->errorf)
- (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
- return -1;
- }
- /*
- * open a magic session
- */
- Magic_t*
- magicopen(Magicdisc_t* disc)
- {
- register Magic_t* mp;
- register int i;
- register int n;
- register int f;
- register int c;
- register Vmalloc_t* vm;
- unsigned char* map[CC_MAPS + 1];
- if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
- return 0;
- if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
- {
- vmclose(vm);
- return 0;
- }
- mp->id = lib;
- mp->disc = disc;
- mp->vm = vm;
- mp->flags = disc->flags;
- mp->redisc.re_version = REG_VERSION;
- mp->redisc.re_flags = REG_NOFREE;
- mp->redisc.re_errorf = (regerror_t)disc->errorf;
- mp->redisc.re_resizef = (regresize_t)vmgetmem;
- mp->redisc.re_resizehandle = (void*)mp->vm;
- mp->dtdisc.key = offsetof(Info_t, name);
- mp->dtdisc.link = offsetof(Info_t, link);
- if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
- goto bad;
- for (n = 0; n < elementsof(info); n++)
- dtinsert(mp->infotab, &info[n]);
- for (i = 0; i < CC_MAPS; i++)
- map[i] = ccmap(i, CC_ASCII);
- mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
- for (n = 0; n <= UCHAR_MAX; n++)
- {
- f = 0;
- i = CC_MAPS;
- while (--i >= 0)
- {
- c = ccmapchr(map[i], n);
- f = (f << CC_BIT) | CCTYPE(c);
- }
- mp->cctype[n] = f;
- }
- return mp;
- bad:
- magicclose(mp);
- return 0;
- }
- /*
- * close a magicopen() session
- */
- int
- magicclose(register Magic_t* mp)
- {
- if (!mp)
- return -1;
- if (mp->tmp)
-