cpdfscan.c | searchcode

/epstool-3.08/src/cpdfscan.c

#
C | 1885 lines | 1581 code | 154 blank | 150 comment | 502 complexity | 0f28b1229799617f443909ec4b52fe19 MD5 | raw file
Possible License(s): GPL-2.0

/* Copyright (C) 2002-2005 Ghostgum Software Pty Ltd.  All rights reserved.

  This software is provided AS-IS with no warranty, either express or
  implied.

  This software is distributed under licence and may not be copied,
  modified or distributed except as expressly authorised under the terms
  of the licence contained in the file LICENCE in this distribution.

  For more information about licensing, please refer to
  http://www.ghostgum.com.au/ or contact Ghostsgum Software Pty Ltd, 
  218 Gallaghers Rd, Glen Waverley VIC 3150, AUSTRALIA, 
  Fax +61 3 9886 6616.
*/

/* $Id: cpdfscan.c,v 1.7 2005/06/10 09:39:24 ghostgum Exp $ */
/* PDF scanner */

/* This is a rudimentary PDF scanner, intended to get
 * the page count, and for each page the Rotate, MediaBox 
 * and CropBox.
 */

#ifdef DEMO_PDFSCAN
# include <windows.h>
# include <stdio.h>
# include <stdarg.h>
# include <string.h>
# include <ctype.h>
# ifdef _MSC_VER
#  define vsnprintf _vsnprintf
# endif
# define csfopen fopen
# define cslen strlen
#else
# include "common.h"
# include <ctype.h>
#endif

#include "cpdfscan.h"


/* Limitations.
 * 
 * We currently load the entire xref table.  To minimise memory
 * would could instead keep a list of xref blocks, and do random
 * access within those.
 *
 * Memory management is very simple.  We just keep a linked
 * list of allocated blocks for composite objects.
 * We empty the stack, and free all PDF objects and composite
 * objects before returning to the caller.
 * We don't bother doing garbage collection.
 */


/* We keep a linked list of memory allocated for composite objects 
 * such as name, string, array or dict.
 */
typedef struct PDFMEM_s PDFMEM;
struct PDFMEM_s {
    void *ptr;
    int len;
    PDFMEM *next;
};
 
/* The token scanner and object references understand the following types */
typedef enum rtype_e {
    invalidtype=0,
    marktype=1,
    nulltype=2,
    booltype=3,		/* uses boolval */
    integertype=4,	/* uses intval */
    realtype=5,		/* uses realval */
    nametype=6,		/* uses nameval */
    stringtype=7,	/* uses strval */
    arraytype=8,	/* uses arrayval */
    dicttype=9,		/* uses dictval */
    optype=10,		/* uses opval */
    streamtype=11,	/* uses streamval */
    objtype=12,		/* uses objval */
    commenttype=13
} rtype;

const char *rtype_string[] = {
    "invalidtype", "marktype", "nulltype", "booltype", "integertype",
    "realtype", "nametype", "stringtype", "arraytype", "dicttype",
    "optype", "streamtype", "objtype", "commenttype"
};

/* A reference contains a simple object, or a pointer to 
 * a composite object.
 */
typedef struct ref_s ref;
struct ref_s {
    rtype type;
    int rsize;
    union value_u {
	/* simple */
        void *voidval;
	BOOL boolval;
	int intval;
	float realval;
	/* composite */
	char *nameval;
	char *strval;
	ref *arrayval;
	ref *dictval;
	char *opval;
	/* simple */
	unsigned long streamval;
	int objval;
    } value;
};

/* Cross reference table entry */
typedef struct PDFXREF_s {
    unsigned long offset;
    int generation;
    BOOL used;
} PDFXREF;

struct PDFSCAN_s {
    void *handle;
    int (*print_fn)(void *handle, const char *ptr, int len);
    TCHAR filename[1024];
    FILE *file;
    char *buf;
    int buflen;		/* length of allocated buf */
    int len;		/* #bytes currently in buf */
    int offset;		/* file offset to start of buf */
    int begin;		/* offset in buf to start of token */
    int end;		/* offset in buf to end of token */
    rtype token_type;	/* token type */
    BOOL instream;	/* In a stream, looking for endstream */
    unsigned long xref_offset;	/* offset to xref table */
    PDFXREF *xref;
    int xref_len;

    /* Object numbers obtained during pdf_scan_open() */
    int root;		/* root object reference */
    int info;		/* document info dicionary reference */
    int pages;		/* Pages dictionary reference */
    int page_count;	/* number of pages */

    /* Cached page media */
    int pagenum;
    int rotate;
    PDFBBOX mediabox;
    PDFBBOX cropbox;

    /* memory allocation */
    PDFMEM *memory_head;
    PDFMEM *memory_tail;

    /* operand stack */
    ref *ostack;
    int ostack_idx;	/* index to top of ostack */
    int ostack_len;	/* Initially 512 */
    int ostack_maxlen;	/* maximum depth of ostack */

    /* objects in memory */
    /* This contains pairs of integer & reference */
    ref *objs;
    int objs_count;	/* count of loaded objects */
    int objs_len;	/* length of objs */
    int objs_maxlen;	/* maximum number entries in objs */
};

typedef enum PDFSEEK_e {
    PDFSEEK_CUR,
    PDFSEEK_END,
    PDFSEEK_SET
} PDFSEEK;


/* Prototypes */
static int pdf_scan_next_token(PDFSCAN *ps);
static int pdf_scan_read_trailer(PDFSCAN *ps, unsigned long *prev);
static int pdf_scan_read_xref(PDFSCAN *ps, unsigned long xref_offset);

static void clear_stack(PDFSCAN *ps);
static void clear_objs(PDFSCAN *ps);
static void pdf_scan_freeall(PDFSCAN *ps);
static void pdf_scan_cleanup(PDFSCAN *ps);
static int pdf_scan_open_file(PDFSCAN *ps);


/*****************************************************************/
/* text message output */

static int
pdf_scan_write(PDFSCAN *ps, const char *str, int len)
{
    if (ps != NULL)
        fwrite(str, 1, len, stdout);
    else
	(*ps->print_fn)(ps->handle, str, len);
    return len;
}

static int
pdf_scan_msgf(PDFSCAN *ps, const char *fmt, ...)
{
va_list args;
int count;
char buf[2048];
    va_start(args,fmt);
    count = vsnprintf(buf, sizeof(buf), fmt, args);
    pdf_scan_write(ps, buf, count);
    va_end(args);
    return count;
}

/*****************************************************************/
/* memory allocation */

static void
pdf_scan_cleanup(PDFSCAN *ps)
{
    if (ps->file)
	fclose(ps->file);
    ps->file = NULL;
    clear_stack(ps);
    clear_objs(ps);
    pdf_scan_freeall(ps);
}

static void *pdf_scan_alloc(PDFSCAN *ps, const void *ptr, int len)
{
    void *data;
    PDFMEM *mem = (PDFMEM *)malloc(sizeof(PDFMEM));
    if (mem == NULL)
	return NULL;

    data = malloc(len);
    if (data == NULL) {
	free(mem);
	return NULL;
    }

    mem->ptr = data;
    mem->next = NULL;
    mem->len = len;
    memcpy(data, ptr, len);

    if (ps->memory_tail) {
	ps->memory_tail->next = mem;
	ps->memory_tail = mem;
    }
    else
	ps->memory_head = ps->memory_tail = mem;
    return data;
}

/* free all name/string/array/dict memory */
static void
pdf_scan_freeall(PDFSCAN *ps)
{
    PDFMEM *memnext;
    PDFMEM *mem = ps->memory_head;
    while (mem) {
	memnext = mem->next;
	free(mem->ptr);
	free(mem);
	mem = memnext;
    }
    ps->memory_head = ps->memory_tail = NULL;
}

/*****************************************************************/
/* Token checks */

static BOOL is_optoken(PDFSCAN *ps, const char *str)
{
    return (ps->token_type == optype) && 
	(ps->end-ps->begin == (int)strlen(str)) && 
	(memcmp(ps->buf+ps->begin, str, ps->end-ps->begin) == 0);
}

static int
type_check(PDFSCAN *ps, rtype type)
{
    if (ps->token_type == type)
	return 0;

    pdf_scan_msgf(ps, "Error at offset %ld.  Expecting %s and found %s\n",
	ps->offset + ps->begin, 
	rtype_string[(int)type],
	rtype_string[(int)ps->token_type]);
    pdf_scan_msgf(ps, "Token is \042");
    pdf_scan_write(ps, ps->buf+ps->begin, ps->end-ps->begin);
    pdf_scan_msgf(ps, "\042\n");
    return -1;
}

static int
op_check(PDFSCAN *ps, const char *str)
{
    int code = type_check(ps, optype);
    if (code)
	return code;

    if (!is_optoken(ps, str)) {
	pdf_scan_msgf(ps, 
	    "Error at offset %ld.  Expecting \042%s\042 and found \042",
	    ps->offset + ps->begin, str); 
	pdf_scan_write(ps, ps->buf+ps->begin, ps->end-ps->begin);
	pdf_scan_msgf(ps, "\042\n");
	code = -1;
    }
    return code;
}

/*****************************************************************/
/* stack */

const ref invalidref = {invalidtype, 0, {NULL}};
const ref markref = {marktype, 0, {NULL}};

/* Push item, return depth of stack */
/* >0 is success, <=0 is failure */
static int push_stack(PDFSCAN *ps, ref r)
{
    int idx;
    if (ps->ostack_idx + 1 >= ps->ostack_len) {
	/* increase stack size */
	ref *newstack;
	int newlen = ps->ostack_len + 256;
	if (newlen > ps->ostack_maxlen) {
	    pdf_scan_msgf(ps, "push_stack: stack overflow\n");
	    return 0;
	}
	newstack = (ref *)malloc(newlen * sizeof(ref));
	if (newstack == NULL) {
	    pdf_scan_msgf(ps, "push_stack: Out of memory\n");
	    return 0;
	}
	memcpy(newstack, ps->ostack, ps->ostack_len * sizeof(ref));
	free(ps->ostack);
	ps->ostack = newstack;
	ps->ostack_len = newlen;
    }
    idx = ++(ps->ostack_idx);
    ps->ostack[idx] = r;
    return idx;
}

static ref pop_stack(PDFSCAN *ps)
{
    if (ps->ostack_idx <= 0) {
	pdf_scan_msgf(ps, "pop_stack: stack underflow\n");
	return invalidref;
    }
    return ps->ostack[ps->ostack_idx--];
}

static void clear_stack(PDFSCAN *ps)
{
    ps->ostack_idx = 0;
}

static ref index_stack(PDFSCAN *ps, int n)
{
    if (n < 0) {
	pdf_scan_msgf(ps, "index_stack: index must not be negative\n");
	return invalidref;
    }
    if (ps->ostack_idx <= n) {
	pdf_scan_msgf(ps, "index_stack: stack isn't that deep\n");
	return invalidref;
    }
    return ps->ostack[ps->ostack_idx-n];
}

static ref top_stack(PDFSCAN *ps)
{
    if (ps->ostack_idx <= 0) {
	pdf_scan_msgf(ps, "top_stack: stack is empty\n");
	return invalidref;
    }
    return ps->ostack[ps->ostack_idx];
}

/*****************************************************************/
/* references */


static ref make_int(int value)
{
    ref r;
    r.type = integertype;
    r.rsize = 0;
    r.value.intval = value;
    return r;
}

static ref make_string(PDFSCAN *ps, const char *str, int len)
{
    ref r;
    r.type = stringtype;
    r.rsize = len;
    r.value.strval = pdf_scan_alloc(ps, str, len);
    if (r.value.strval == NULL)
	return invalidref;
    return r;
}

static ref make_name(PDFSCAN *ps, const char *str, int len)
{
    ref r;
    r.type = nametype;
    r.rsize = len;
    r.value.nameval = pdf_scan_alloc(ps, str, len);
    if (r.value.nameval == NULL)
	return invalidref;
    return r;
}

static BOOL nameref_equals(ref *r, const char *name)
{
    int len = (int)strlen(name);
    if (r->type != nametype)
	return FALSE;
    if (r->rsize != len)
	return FALSE;
    return (memcmp(r->value.nameval, name, len) == 0);
}

/* Get a reference from a dictionary */
/* Return the result, but don't push it */
static ref dict_get(PDFSCAN *ps, const char *name)
{
    int namelen = (int)strlen(name);
    ref dict = top_stack(ps);
    ref *r;
    int dictlen;
    int i;
    if (dict.type == invalidtype)
	return invalidref;
    dictlen = dict.rsize * 2;
    for (i = 0; i<dictlen; i+=2) {
	r = &dict.value.dictval[i];
	if ((r->rsize == namelen) && (r->type == nametype) &&
	    (memcmp(r->value.nameval, name, namelen) ==0))
	    return dict.value.dictval[i+1];
    }
    return invalidref;
}

/* convert the items on the stack to an array on the stack */
static ref array_to_mark(PDFSCAN *ps)
{
    ref r;
    ref *array;
    int n = ps->ostack_idx;
    int len;
    while ((n>0) && (ps->ostack[n].type != marktype))
	n--;
    if (n == 0) {
	pdf_scan_msgf(ps, "array_to_mark: no mark on stack\n");
	return invalidref;
    }
    len = ps->ostack_idx - n;
    r.type = arraytype;
    r.rsize = len;
    r.value.arrayval = NULL;
    if (len) {
        array = pdf_scan_alloc(ps, &ps->ostack[n+1], len * sizeof(ref));
	if (array)
	    r.value.arrayval = array;
	else
	    return invalidref;
    }
    ps->ostack_idx -= len + 1;
    push_stack(ps, r);
    return r;
}

/* convert the items on the stack to a dictionary on the stack */
static ref dict_to_mark(PDFSCAN *ps)
{
    ref r;
    ref *dict;
    int n = ps->ostack_idx;
    int len;
    while ((n>0) && (ps->ostack[n].type != marktype))
	n--;
    if (n == 0) {
	pdf_scan_msgf(ps, "dict_to_mark: no mark on stack\n");
	return invalidref;
    }
    len = ps->ostack_idx - n;
    if (len & 1) {
	pdf_scan_msgf(ps, "dict_to_mark: must have name/value pairs\n");
	return invalidref;
    }
    r.type = dicttype;
    r.rsize = len/2;
    r.value.arrayval = NULL;
    if (len) {
        dict = pdf_scan_alloc(ps, &ps->ostack[n+1], len * sizeof(ref));
	if (dict)
	    r.value.arrayval = dict;
	else
	    return invalidref;
    }
    ps->ostack_idx -= len + 1;
    push_stack(ps, r);
    return r;
}

/*****************************************************************/

/* Push reference from a token */
static ref push_token(PDFSCAN *ps)
{
    ref r;
    int len = ps->end - ps->begin;
    const char *p = ps->buf + ps->begin;
    r.type = ps->token_type;
    r.rsize = 0;
    r.value.voidval = NULL;
    switch(r.type) {
      case invalidtype:
	break;
      case marktype:
	break;
      case nulltype:
	break;
      case booltype:
	if ((len == 4) && (memcmp(p, "true", 4)==0))
	    r.value.boolval = TRUE;
	else if ((len == 5) && (memcmp(p, "true", 5)==0))
	    r.value.boolval = FALSE;
	else
	    r = invalidref;
	break;
      case integertype:
	{   char buf[64];
	    if (len > (int)sizeof(buf)-1)
		r = invalidref;
	    else {
		memcpy(buf, p, len);
		buf[len] = '\0';
		r.value.intval = atoi(buf);
	    }
	}
	break;
      case realtype:
	{   char buf[64];
	    if (len > (int)sizeof(buf)-1)
		r = invalidref;
	    else {
		memcpy(buf, p, len);
		buf[len] = '\0';
		r.value.realval = (float)atof(buf);
	    }
	}
	break;
      case nametype:
	r = make_name(ps, p+1, len-1);
	break;
      case stringtype:
	r = make_string(ps, p, len);
	break;
      case streamtype:
      case commenttype:
      case objtype:
      case optype:
      case arraytype:
      case dicttype:
	/* Can't push these from a token */
	/* These are made by operators like stream, R, ], >> */
	return invalidref;
      default:
	r.type = invalidtype;
	break;
    }
    push_stack(ps, r);
    return r;
}

/* Process known operators */
static int process_op(PDFSCAN *ps)
{
   ref r;
   if (ps->token_type != optype)
	return 1;	/* not an op */
   if (is_optoken(ps, "R")) {
	/* convert "n 0 R" to an indirect reference */
	ref r1 = index_stack(ps, 1);
	r = top_stack(ps);
	if ((r.type == integertype) && (r1.type == integertype)) {
	    r.type = objtype;
	    r.rsize = r.value.intval;
	    r.value.intval = r1.value.intval;
	    pop_stack(ps);
	    pop_stack(ps);
	    push_stack(ps, r);
	}
   }
   else if (is_optoken(ps, "]")) {
	array_to_mark(ps);
   }
   else if (is_optoken(ps, ">>")) {
	dict_to_mark(ps);
   }
   else if (is_optoken(ps, "null")) {
	r.type = nulltype;
	r.rsize = 0;
	r.value.voidval = NULL;
	push_stack(ps, r);
   }
   else if (is_optoken(ps, "obj")) {
	pdf_scan_msgf(ps, "ignoring obj token\n");
	/* ignore */
   }
   else if (is_optoken(ps, "endobj")) {
	pdf_scan_msgf(ps, "ignoring endobj token\n");
	/* ignore */
   }
   else if (is_optoken(ps, "stream")) {
	/* stream object contains offset to start of stream */
	r.type = streamtype;
	r.rsize = 0;
	r.value.streamval = ps->offset + ps->end;
	push_stack(ps, r);
	/* Now skip over stream */
        pdf_scan_next_token(ps);
   }
   else {
	pdf_scan_msgf(ps, "process_op: unrecognised operator \042");
	pdf_scan_write(ps, ps->buf+ps->begin, ps->end-ps->begin);
	pdf_scan_msgf(ps, "\042\n");
	return -1;
   }
   return 0;
}

/*****************************************************************/
/* Debugging and error messages */

#ifdef NOTUSED

/* Print a reference, returning number of characters written */
static int
print_ref(PDFSCAN *ps, ref *r)
{
    int n = 0;
    switch(r->type) {
      case invalidtype:
	n = pdf_scan_msgf(ps, "--invalid--");
	break;
      case marktype:
	n = pdf_scan_msgf(ps, "--mark--");
	break;
      case nulltype:
	n = pdf_scan_msgf(ps, "--null--");
	break;
      case booltype:
	n = pdf_scan_msgf(ps, "%s", r->value.boolval ? "true" : "false");
	break;
      case integertype:
	n = pdf_scan_msgf(ps, "%d", r->value.intval);
	break;
      case realtype:
	n = pdf_scan_msgf(ps, "%g", r->value.realval);
	break;
      case nametype:
	n = pdf_scan_write(ps, "/", 1);
	pdf_scan_write(ps, r->value.nameval, r->rsize);
	break;
      case stringtype:
	n = pdf_scan_write(ps, "(", 1);
	n += pdf_scan_write(ps, r->value.strval, r->rsize);
	n += pdf_scan_write(ps, ")", 1);
	break;
      case streamtype:
	n = pdf_scan_msgf(ps, "--stream:%d--", r->value.streamval);
	break;
      case commenttype:
	n = pdf_scan_msgf(ps, "--comment--");
	break;
      case objtype:
	n = pdf_scan_msgf(ps, "--obj:%d--", r->value.objval);
	break;
      case optype:
	n = pdf_scan_msgf(ps, "--op:");
	n += pdf_scan_write(ps, r->value.opval, r->rsize);
	n += pdf_scan_write(ps, "--", 2);
	break;
      case arraytype:
	n = pdf_scan_msgf(ps, "--array:%d--", r->rsize);
	break;
      case dicttype:
	n = pdf_scan_msgf(ps, "--dict:%d--", r->rsize);
	break;
      default:
	n = pdf_scan_msgf(ps, "--unknown--");
	break;
    }
    return n;
}

/* print a reference, expanding array and dict */ 
static int
print_ref_expand(PDFSCAN *ps, ref *r)
{
    int i;
    int n = 0;;
    if (r->type == arraytype) {
	n += pdf_scan_msgf(ps, "[ ");
	for (i=0; i<r->rsize; i++) {
	    n += print_ref(ps, &r->value.arrayval[i]);
	    n += pdf_scan_msgf(ps, " ");
	}
	n += pdf_scan_msgf(ps, "]");
    }
    else if (r->type == dicttype) {
	n += pdf_scan_msgf(ps, "<< ");
	for (i=0; i<r->rsize; i++) {
	    n += print_ref(ps, &r->value.dictval[i+i]);
	    n += pdf_scan_msgf(ps, " ");
	    n += print_ref(ps, &r->value.dictval[i+i+1]);
	    n += pdf_scan_msgf(ps, " ");
	}
	n += pdf_scan_msgf(ps, ">>");
    }
    else
	n += print_ref(ps, r);
    return n;
}

static void
print_stack(PDFSCAN *ps)
{
    int i, n=ps->ostack_idx;
    int col = 0;
    pdf_scan_msgf(ps, "Stack: ");
    for (i=1; i<=n; i++) {
	col += print_ref(ps, &ps->ostack[i]);
	if (col > 70) {
            pdf_scan_msgf(ps, "\n");
	    col = 0;
	}
	else
            col += pdf_scan_msgf(ps, " ");
    }
    pdf_scan_msgf(ps, "\n");
}

static void
print_stack_expand(PDFSCAN *ps)
{
    int i, n=ps->ostack_idx;
    pdf_scan_msgf(ps, "Stack:\n");
    for (i=1; i<=n; i++) {
        pdf_scan_msgf(ps, "%2d: ", i);
	print_ref_expand(ps, &ps->ostack[i]);
        pdf_scan_msgf(ps, "\n");
    }
}

static void pdf_scan_print_allocated(PDFSCAN *ps)
{
    int count = 0;
    int len = 0;
    PDFMEM *mem = ps->memory_head;
    while (mem) {
	len += sizeof(PDFMEM);
	len += mem->len;
	count++;
	mem = mem->next;
    }
    pdf_scan_msgf(ps, "Allocated memory %d bytes in %d objects\n", 
	len, count);
}

#endif

/*****************************************************************/
/* object reading and cache */

static int obj_add(PDFSCAN *ps, int objnum, ref objref)
{
    if (ps->objs_count + 2 >= ps->objs_len) {
	/* allocate more space */
	ref *newobjs;
	int newlen = ps->objs_len + 256;
	if (newlen > ps->objs_maxlen) {
	    pdf_scan_msgf(ps, "obj_add: too many objects to cache\n");
	    return 0;
	}
	newobjs = (ref *)malloc(newlen * sizeof(ref));
	if (newobjs == NULL) {
	    pdf_scan_msgf(ps, "obj_add: Out of memory\n");
	    return 0;
	}
	memcpy(newobjs, ps->objs, ps->objs_len * sizeof(ref));
	free(ps->objs);
	ps->objs = newobjs;
	ps->objs_len = newlen;
    }
    ps->objs[ps->objs_count++] = make_int(objnum);
    ps->objs[ps->objs_count++] = objref;
    return ps->objs_count;
}

static ref obj_find(PDFSCAN *ps, int objnum)
{
    int i;
    for (i=0; i<ps->objs_count; i+=2) {
	if (objnum == ps->objs[i].value.intval)
	    return ps->objs[i+1];
    }
    return invalidref;
}

static void clear_objs(PDFSCAN *ps)
{
    ps->objs_count = 0;
}

/*****************************************************************/
/* token parsing */

static int is_white(char ch)
{
    return (ch == '\0') || (ch == '\t') || (ch == '\n') ||
	(ch == '\f') || (ch == '\r') || (ch == ' ');
}

static int is_delimiter(char ch)
{
    return (ch == '(') || (ch == ')') || 
	(ch == '<') || (ch == '>') ||
	(ch == '[') || (ch == ']') ||
	(ch == '{') || (ch == '}') ||
	(ch == '/') || (ch == '%');
}


/* Scan next token from buffer, returning token type and offset to begin 
 * and end of token.
 * Return 0 if OK, 1 if no token or not enough data, -1 on error
 */
static int pdf_scan_token(const char *buf, int buflen, 
    rtype *ttype, int *tbegin, int *tend)
{
    int code = -1;
    int i = 0;
    rtype type;
    int begin, end;
    *ttype = type = invalidtype;
    *tbegin = begin = 0;
    *tend = end = 0;
    while ((i < buflen) && is_white(buf[i]))
	i++;
    if (i == buflen)
	return 1;

    begin = i;
    if (buf[i] == '%') {
	while (i < buflen) {
	    if ((buf[i] == '\n') || (buf[i] == '\r')) {
		type = commenttype;
		end = i;
		code = 0;
		break;
	    }
	    i++;
	}
        if (i >= buflen)
	    code = 1;

    }
    else if (buf[i] == '(') {
	/* string */
	int pcount = 0;
	type = stringtype;
	i++;
	while (i < buflen) {
	    if (buf[i] == '\\')
		i++;
	    else if (buf[i] == '(')
		pcount++;
	    else if (buf[i] == ')') {
		if (pcount <= 0) {
		    end = i+1;
		    code = 0;
		    break;
		}
		else
		    pcount--;
	    }
	    i++;
	}
	if (i >= buflen)
	    code = 1;
    }
    else if (buf[i] == '<') {
	i++;
	if (i >= buflen) {
	    code = 1;
	}
	else if (buf[i] == '<') {
	    /* marktype */
	    end = i+1;
	    type = marktype;
	    code = 0;
	}
	else {
	    /* hexadecimal string */
	    type = stringtype;
	    while (i < buflen) {
		if (buf[i] == '>') {
		    end = i+1;
		    code = 0;
		    break;
		}
		i++;
	    }
	    if (i >= buflen)
		code = 1;
	}
    }
    else if (buf[i] == '[') {
	code = 0;
	end = i+1;
	type = marktype;
    }
    else if (buf[i] == '/') {
	/* name */
	type = nametype;
	i++;
	while (i < buflen) {
	    if (is_white(buf[i]) || is_delimiter(buf[i])) {
		end = i;
		code = 0;
		break;
	    }
	    i++;
	}
	if (i >= buflen)
	    code = 1;
    }
    else if (is_delimiter(buf[i])) {
	/* skip over delimiter */
	if (buf[i] == '>') {
	    i++;
	    if (i < buflen) {
		if (buf[i] == '>') {
		    type = optype;
		    end = i+1;
		    code = 0;
		}
		else
		    code = -1;
	    }
	}
	else {
	    type = optype;
	    end = i+1;
	    code = 0;
	}
	if (i >= buflen)
	    code = 1;
    }
    else {
	/* First assume that it is an op */
	type = optype;
	while (i < buflen) {
	    if (is_white(buf[i]) || is_delimiter(buf[i])) {
		end = i;
		code = 0;
		break;
	    }
	    i++;
	}
	if (i >= buflen)
	    code = 1;

	/* try to convert it into a bool */
	if ((code == 0) && (type == optype)) {
	    if ((end - begin == 4) && 
		(memcmp(buf+begin, "true", 4) == 0)) {
		type = booltype;
	    }
	    else if ((end - begin == 5) && 
		(memcmp(buf+begin, "false", 5) == 0)) {
		type = booltype;
	    }
	}

	/* try to convert it into an integer */
	if ((code == 0) && (type == optype)) {
	    int j;
	    char ch;
	    BOOL isreal = FALSE;
	    BOOL isnum = TRUE;
	    for (j=begin; j<end; j++) {
		ch = buf[j];
		if (ch == '.')
		    isreal = TRUE;
		if (!((ch == '-') || (ch == '+') || (ch == '.') || 
		    isdigit((int)ch)))
		    isnum = FALSE;
	    }
	    if (isnum) {
		if (isreal)
		    type = realtype;
		else
		    type = integertype;
	    }
	}
    }

    *ttype = type;
    *tbegin = begin;
    *tend = end;
    return code;
}

/*****************************************************************/

static void pdf_scan_finish(PDFSCAN *ps)
{
    if (ps->file) {
	fclose(ps->file);
	ps->file = NULL;
    }
    if (ps->buf) {
	free(ps->buf);
	ps->buf = NULL;
    }
    ps->buflen = 0;
    if (ps->xref) {
	free(ps->xref);
	ps->xref = NULL;
    }
    ps->xref_len = 0;
    if (ps->ostack) {
	free(ps->ostack);
	ps->ostack = NULL;
    }
    ps->ostack_len = 0;
    ps->ostack_idx = 0;

    if (ps->objs) {
	free(ps->objs);
	ps->objs = NULL;
    }
    ps->objs_len = 0;
    ps->objs_count = 0;
    memset(ps, 0, sizeof(PDFSCAN));
}

static int pdf_scan_open_file(PDFSCAN *ps)
{
    ps->file = csfopen(ps->filename, TEXT("rb"));
    if (ps->file == NULL)
	return -1;
    return 0;
}

static int pdf_scan_init(PDFSCAN *ps, const TCHAR *name)
{
    int len = (int)(cslen(name)+1) * sizeof(TCHAR);
    if (len > (int)sizeof(ps->filename))
	return -1;
    memcpy(ps->filename, name, len);
    if (pdf_scan_open_file(ps) != 0) 
	return -1;
    ps->buflen = 256;
    ps->buf = (char *)malloc(ps->buflen);
    if (ps->buf == NULL) {
	pdf_scan_finish(ps);
	return -2;
    }
    ps->ostack_maxlen = 4096;
    ps->ostack_len = 256;
    ps->ostack_idx = 0;	/* empty */
    ps->ostack = (ref *)malloc(ps->ostack_len * sizeof(ref));
    if (ps->ostack == NULL) {
	pdf_scan_finish(ps);
	return -2;
    }
    /* make first item on stack invalid */
    ps->ostack[0].type = invalidtype;
    ps->ostack[0].rsize = 0;
    ps->ostack[0].value.voidval = NULL;

    /* object cache */
    ps->objs_maxlen = 1024;
    ps->objs_len = 256;
    ps->objs_count = 0;	/* empty */
    ps->objs = (ref *)malloc(ps->objs_len * sizeof(ref));
    if (ps->objs == NULL) {
	pdf_scan_finish(ps);
	return -2;
    }

    ps->pagenum = -1;	/* no cached media info yet */

    return 0;
}

static int pdf_scan_seek(PDFSCAN *ps, long offset, PDFSEEK whence)
{
    int code = -1;
    switch (whence) {
	case PDFSEEK_CUR:
	    offset = ps->offset + ps->end + offset;
	case PDFSEEK_SET:
	    ps->begin = ps->end = ps->len = 0;
	    code = fseek(ps->file, offset, SEEK_SET);
	    ps->offset = offset;
	    break;
	case PDFSEEK_END:
	    code = fseek(ps->file, 0, SEEK_END);
	    ps->begin = ps->end = ps->len = 0;
	    ps->offset = ftell(ps->file);
	    break;
    }
    return code;
}

/* Read next token from PDF file */
/* Return 0 if OK, or -1 if EOF, -2 if error */
/* Set *token_type to token type */
static int pdf_scan_next_token(PDFSCAN *ps)
{
    int code = 0;
    int count;
    rtype type=invalidtype;
    int begin=0, end=0;

    do {
	if ((code == 1) && ps->end) {
	    /* move characters to front of buffer */
	    if (ps->len - ps->end)
		memmove(ps->buf, ps->buf+ps->end, ps->len - ps->end);
	    ps->offset += ps->end;
	    ps->len = ps->len - ps->end;
	    ps->begin = 0;
	    ps->end = 0;
	}

	if ((code == 1) && (ps->len >= ps->buflen)) {
	    /* increase buffer size */
	    char *newbuf;
	    int newbuflen = 2 * ps->buflen;
	    newbuf = (char *)malloc(newbuflen);
	    if (newbuf) {
		memcpy(newbuf, ps->buf, ps->buflen);
		free(ps->buf);
		ps->buf = newbuf;
		ps->buflen = newbuflen;
	    }
	    else {
		pdf_scan_msgf(ps, "Out of memory in pdf_scan_next_token\n");
		pdf_scan_msgf(ps, "Tried to realloc %d to %d\n",
		    ps->buflen, newbuflen);
		code = -2;
		break;
	    }
	}

	if ((code == 1) || (ps->len == 0)) {
	    count = (int)fread(ps->buf+ps->len, 1, ps->buflen-ps->len, 
		ps->file);
	    if (count == 0) {
		pdf_scan_msgf(ps, "EOF in pdf_scan_next_token\n");
		code = -1;
		break;
	    }
	    ps->len += count;
	}

	while (ps->instream) {
	    /* We are in a stream.  Keep reading until we find
	     * the endstream.  This isn't robust. It can be fooled 
	     * by "endstream" occuring within a stream.
	     */
	    while ((ps->end < ps->len) && (ps->buf[ps->end] != 'e'))
		ps->end++;
	    /* look for endstream */
	    if (ps->end + 9 >= ps->len) {
		code = 1;	/* need more */
		break;
	    }
	    if (memcmp(ps->buf+ps->end, "endstream", 9) == 0)
		ps->instream = FALSE;
	    else
		ps->end++;
	}
	if (!ps->instream)
	    code = pdf_scan_token(ps->buf+ps->end, ps->len - ps->end, 
		&type, &begin, &end);
    } while (code == 1);


    if (code == 0) {
	/* got a token */
	ps->begin = ps->end + begin;
	ps->end = ps->end + end;
	ps->token_type = type;

	if ((type == optype) && (ps->end-ps->begin == 6) &&
		(memcmp(ps->buf+ps->begin, "stream", 6) == 0))
	    ps->instream = TRUE;
    }

    return code;
}

/*****************************************************************/
/* Reading %%EOF, xref, traler */

static int
previous_line(const char *str, int len)
{
    int i = len-1;
    /* first skip over EOL */
    while ((i > 0) && ((str[i]=='\r') || (str[i]=='\n')))
	i--;
    while ((i > 0) && !((str[i]=='\r') || (str[i]=='\n')))
	i--;
    if (!((str[i]=='\r') || (str[i]=='\n')))
	return -1; /* didn't find a line */
    return i+1;
}

static int
pdf_scan_find_xref(PDFSCAN *ps)
{
    char buf[4096];
    int i, j;
    int code = -1;
    int count;
    pdf_scan_seek(ps, 0, PDFSEEK_END);
    count = min((int)sizeof(buf), ps->offset);
    pdf_scan_seek(ps, -count, PDFSEEK_CUR);
    count = (int)fread(buf, 1, sizeof(buf), ps->file);
    pdf_scan_seek(ps, 0, PDFSEEK_SET);
    if (count == 0)
	return -1;
    i = count - 5;
    while (i > 0) {
	/* Find %%EOF */
	if (memcmp(buf+i, "%%EOF", 5) == 0) {
	    code = 0;
	    break;
	}
	i--;
    }
    if (i == 0) {
	pdf_scan_msgf(ps, "Failed to find %%EOF\n");
	code = -1;
    }
    if (code == 0) {
	/* Look for xref table offset */
	j = previous_line(buf, i);
	if (j >= 0)
	    ps->xref_offset = atol(buf+j);
	else 
	    code = -1;
	i = j;
	if (ps->xref_offset == 0)
	    code = -1;
	if (code != 0)
	    pdf_scan_msgf(ps, "Failed to find cross reference table\n");
    }

    if (code == 0) {
	/* Look for "startxref" */
	j = previous_line(buf, i);
	if (j >= 0) {
	    if (memcmp(buf+j, "startxref", 9) != 0)
		code = -1;
	}
	else {
	    code = -1;
	}
	if (code != 0)
	    pdf_scan_msgf(ps, "Failed to find startxref\n");
    }
    return code;
}

/* Read a cross reference table */
/* This is called for each cross reference table */
static int
pdf_scan_read_xref(PDFSCAN *ps, unsigned long xref_offset)
{
    int code;
    int i;
    int first = 0;
    int count = 0;
    unsigned long prev = 0;
    unsigned long offset = 0;
    int generation = 0;
    BOOL used = FALSE;
    pdf_scan_seek(ps, xref_offset, PDFSEEK_SET);
    code = pdf_scan_next_token(ps);
    if (code == 0)
	code = op_check(ps, "xref");
    while (code == 0) {
        code = pdf_scan_next_token(ps);
        if ((code == 0) && is_optoken(ps, "trailer"))
	    break;	/* finished this xref table */
	if (code == 0) {
	    first = atoi(ps->buf + ps->begin);
            code = pdf_scan_next_token(ps);
	}
	if (code == 0) {
	    count = atoi(ps->buf + ps->begin);
	}
	if (code == 0) {
	    /* make sure there is enough space in the table */
	    if (first + count > ps->xref_len) {
		int len = (first + count) * sizeof(PDFXREF);
		PDFXREF *newxref = (PDFXREF *)malloc(len);
		if (newxref) {
		    memset(newxref, 0, len);
		    memcpy(newxref, ps->xref, ps->xref_len * sizeof(PDFXREF));
		    free(ps->xref);
		    ps->xref = newxref;
		    ps->xref_len = first + count;
		}
		else {
		    pdf_scan_msgf(ps, "pdf_scan_read_xref: out of memory\n");
		    code = -2;
		    break;
		}
	    }
	}
	for (i=first; i<first+count; i++) {
            code = pdf_scan_next_token(ps);
	    if (code == 0) {
		offset = atol(ps->buf+ps->begin);
                code = pdf_scan_next_token(ps);
	    }
	    if (code == 0) {
		generation = atoi(ps->buf+ps->begin);
                code = pdf_scan_next_token(ps);
	    }
	    if (code == 0) {
		if (is_optoken(ps, "n"))
		    used = TRUE;
		else if (is_optoken(ps, "f"))
		    used = FALSE;
		else
		    code = -1;
	    }
	    /* We don't deal correctly with generation.
	     * We assume that the first xref table that marks an
	     * object as used is the definitive reference.
	     */
	    if (code == 0) {
		if (!(ps->xref[i].used)) {
		    ps->xref[i].offset = offset;
		    ps->xref[i].generation = generation;
		    ps->xref[i].used = used;
		}
	    }
	}
    }

    if (code == 0) {
	code = pdf_scan_read_trailer(ps, &prev);
	if ((code == 0) && prev && prev != ps->xref_offset) {
	    /* read older xref and trailer */
	    code = pdf_scan_read_xref(ps, prev);
	}
    }

    return code;
}

/* Read a trailer */
static int
pdf_scan_read_trailer(PDFSCAN *ps, unsigned long *prev)
{
    int code = 0;
    ref p;
    code = pdf_scan_next_token(ps);
    if ((code == 0) && (ps->token_type != marktype))
	code = -1;
    push_token(ps);
    while (code == 0) {
        code = pdf_scan_next_token(ps);
	if (code != 0)
	    break;
	if (is_optoken(ps, "startxref")) {
	    if (ps->root == 0) {
	        p = dict_get(ps, "Root");
	        if (p.type == objtype)
		    ps->root = p.value.objval;
		else {
		    pdf_scan_msgf(ps, 
			"trailer /Root requires indirect reference\n");
		    code = -1;
		}
	    }
	    p = dict_get(ps, "Prev");
	    if (p.type == integertype)
		*prev = p.value.intval;
	    else if (p.type != invalidtype) {
		code = -1;
		pdf_scan_msgf(ps, "trailer /Prev requires integer\n");
	    }
	    break;
	}
	if (process_op(ps) != 0)
	    push_token(ps);
    }
    if (code != 0)
	pdf_scan_msgf(ps, "Error reading trailer\n");
    return code;
}


static int pdf_scan_read_object_start(PDFSCAN *ps, int objnum)
{
    int code = 0;
    int value = 0;
    if (objnum == 0) {
	pdf_scan_msgf(ps, "Object 0 is always unused\n");
	return -1;
    }
    if (objnum >= ps->xref_len) {
	pdf_scan_msgf(ps, "Object reference %d doesn't exist.  There are only %d objects\n", objnum, ps->xref_len);
	return -1;
    }
    if (!ps->xref[objnum].used) {
	pdf_scan_msgf(ps, "Object %d is unused\n", objnum);
	return -1;
    }
    pdf_scan_seek(ps, ps->xref[objnum].offset, PDFSEEK_SET);

    code = pdf_scan_next_token(ps);		/* object number */
    if (code == 0)
	code = type_check(ps, integertype);
    if (code == 0) {
	value = atoi(ps->buf+ps->begin);	/* object number */
	code = pdf_scan_next_token(ps); 	/* generation */
    }
    if (code == 0)
	code = type_check(ps, integertype);
    if (code == 0)
	code = pdf_scan_next_token(ps);   	/* obj */
    if (code == 0)
	code = op_check(ps, "obj");

    if (value != objnum) {
	pdf_scan_msgf(ps, "Didn't find object %d\n", objnum);
	return -1;
    }
    return code;
}

/*****************************************************************/

/* Read an object, and leave it on the stack */
static int
pdf_scan_read_object(PDFSCAN *ps, int objnum)
{
    int code;
    ref objref = obj_find(ps, objnum);

    if (objref.type != invalidtype) {
	/* found in cache */
	push_stack(ps, objref);
	return 0;
    }

    code = pdf_scan_read_object_start(ps, objnum);
    if (code) {
	pdf_scan_msgf(ps, "Didn't find object %d\n", objnum);
	return -1;
    }

    code = pdf_scan_next_token(ps);
    if ((code == 0) && (ps->token_type != marktype))
	code = -1;
    push_token(ps);
    while (code == 0) {
        code = pdf_scan_next_token(ps);
	if (code != 0)
	    break;
	if (is_optoken(ps, "endobj")) {
	    obj_add(ps, objnum, top_stack(ps));
	    break;
	}
	if (process_op(ps) != 0)
	    push_token(ps);
    }
    return code;
}

/*****************************************************************/

/* find the object number for a page */
/* Return <= 0 if failure, or object number */
/* First page is 0 */
static int pdf_scan_find_page(PDFSCAN *ps, int pagenum)
{
    int code;
    ref kids;
    ref r;
    int pageobj = 0;
    int count_base = 0;
    int count;
    ref *pref;
    int i;
    int inext;

    if (pagenum >= ps->page_count) {
	pdf_scan_msgf(ps, "Not that many pages\n");
	return -1;
    }
    code = pdf_scan_read_object(ps, ps->pages);
    if (code) {
	pdf_scan_msgf(ps, "Didn't find Pages object\n");
	return -1;
    }
    /* iterate through Kids, looking for the one that includes this page */ 
    kids = dict_get(ps, "Kids");
    if (kids.type != arraytype) {
	pdf_scan_msgf(ps, "/Pages object %d must contain /Kids array\n",
	    ps->pages);
	return -1;
    }
    pop_stack(ps);	/* First Pages */
    for (i = 0; (i < kids.rsize) && (code == 0); i=inext) {
	inext = i+1;
	pref = &kids.value.arrayval[i];
	if (pref->type == objtype)
        code = pdf_scan_read_object(ps, pref->value.objval);
	if (code == 0) {
	    r = dict_get(ps, "Type"); 
	    if (nameref_equals(&r, "Page")) {
		if (count_base + i == pagenum) {
		    /* this is it */
		    pageobj = pref->value.objval;
		    pop_stack(ps);	/* the wanted page */
		    break;
		}
	    }
	    else if (nameref_equals(&r, "Pages")) {
	        r = dict_get(ps, "Count"); 
		if (r.type == integertype) {
		    count = r.value.intval;
		    if (pagenum < count_base + count) {
			/* It's under this child */
			inext = 0;
		        pop_stack(ps);	/* The old /Pages */
			code = pdf_scan_read_object(ps, pref->value.objval);
			if (code == 0) {
			    kids = dict_get(ps, "Kids");
			    if (kids.type != arraytype) {
				pdf_scan_msgf(ps, 
				"/Pages object %d must contain /Kids array\n",
				    pref->value.objval);
				code = -1;
			    }
			}
		    }
		    else {
			count_base += count;
		    }
		}
		else {
		    pdf_scan_msgf(ps, "/Pages /Count must be integer\n");
		    code = -1;
		}
	    }
	    else {
		pdf_scan_msgf(ps, 
		    "pdf_scan_find_page: object %d isn't Pages or Page\n", 
		    pref->value.objval);
		code = -1;
	    }
	    pop_stack(ps);
	}
    }

    if (pageobj <= 0) {
	pdf_scan_msgf(ps, "Failed to find page %d\n", pagenum+1);
	code = -1;
    }

    if (code)
	return -1;

    /* Don't clean up, since we will use the cached objects
     * when extracting the page media.
     */

    return pageobj;
}


static int
pdf_scan_read_page_count(PDFSCAN *ps)
{
    int code;
    ref p;
    code = pdf_scan_read_object(ps, ps->pages);
    if (code) {
	pdf_scan_msgf(ps, "Didn't find Pages object\n");
	return -1;
    }

    p = dict_get(ps, "Type");
    if (!nameref_equals(&p, "Pages")) {
	pdf_scan_msgf(ps, "Pages object didn't have /Type /Pages\n");
	return -1;
    }
    p = dict_get(ps, "Count");
    if (p.type != integertype) {
	pdf_scan_msgf(ps, "Pages object didn't integer /Count\n");
	return -1;
    }
    ps->page_count = p.value.intval;

    return code;
}

static int convert_float(ref r, float *f)
{
    if (r.type == realtype)
	*f = r.value.realval;
    else if (r.type == integertype)
	*f = (float)r.value.intval;
    else
       return -1;
    return 0;
}

static int
pdf_scan_read_bbox(PDFBBOX *box, ref array)
{
    int code = 0;
    if (array.type != arraytype)
	code = -1;
    if (array.rsize != 4)
	code = -1;
    if (code == 0)
        code = convert_float(array.value.arrayval[0], &box->llx);
    if (code == 0)
	code = convert_float(array.value.arrayval[1], &box->lly);
    if (code == 0)
	code = convert_float(array.value.arrayval[2], &box->urx);
    if (code == 0)
	code = convert_float(array.value.arrayval[3], &box->ury);
    return code;
}

/* Read catalog and leave on stack */
static int
pdf_scan_read_catalog(PDFSCAN *ps)
{
    int code;
    ref p;
    /* Read root object, making sure it is /Type /Catalog,
     * and that /Pages is an indirect reference
     */
    code = pdf_scan_read_object(ps, ps->root);
    if (code) {
	pdf_scan_msgf(ps, "Didn't find Root object\n");
	return -1;
    }

    p = dict_get(ps, "Type");
    if (!nameref_equals(&p, "Catalog")) {
	pdf_scan_msgf(ps, "Root object didn't have /Type /Catalog\n");
	return -1;
    }
    p = dict_get(ps, "Pages");
    if (p.type != objtype) {
	pdf_scan_msgf(ps, "Root object didn't indirect reference to /Pages\n");
	return -1;
    }
    ps->pages = p.value.intval;
    return 0;
}

/*****************************************************************/
/* public functions */


void
pdf_scan_close(PDFSCAN *ps)
{
    pdf_scan_cleanup(ps);
    pdf_scan_finish(ps);
    free(ps);
}


PDFSCAN *
pdf_scan_open(const TCHAR *filename, void *handle,
    int (*fn)(void *handle, const char *ptr, int len))
{
    int code;
    int rotate;
    PDFBBOX mediabox, cropbox;
    PDFSCAN *ps = (PDFSCAN *)malloc(sizeof(PDFSCAN));
    if (ps == NULL)
	return NULL;
    memset(ps, 0, sizeof(PDFSCAN));
    ps->handle = handle;
    ps->print_fn = fn;
    code = pdf_scan_init(ps, filename);
    if (code == -1)
	pdf_scan_msgf(ps, "Couldn't open PDF file\n");
    else if (code != 0)
	pdf_scan_msgf(ps, "Error initialising PDF scanner\n");

    if (code == 0)
        code = pdf_scan_find_xref(ps);
    if (code == 0)
	code = pdf_scan_read_xref(ps, ps->xref_offset);
    if (code == 0)
	code = pdf_scan_read_catalog(ps);
    if (code == 0)
	code = pdf_scan_read_page_count(ps);
    if (code == 0)
	code = pdf_scan_page_media(ps, 0, &rotate, &mediabox, &cropbox);

    pdf_scan_cleanup(ps);
    if (code != 0) {
	pdf_scan_close(ps);
	ps = NULL;
    }
    return ps;
}

int
pdf_scan_page_count(PDFSCAN *ps) 
{
    if (ps == NULL)
	return 0;
    return ps->page_count;
}

int
pdf_scan_page_media(PDFSCAN *ps, int pagenum, int *rotate,
    PDFBBOX *mediabox, PDFBBOX *cropbox)
{
    BOOL found_rotate = FALSE;
    BOOL found_mediabox = FALSE;
    BOOL found_cropbox = FALSE;
    BOOL has_parent = TRUE;
    ref p, objref;
    int objnum;

    if (ps == NULL)
	return -1;

    if (pagenum == ps->pagenum) {
	/* Used cached values */
	*rotate = ps->rotate;
	*mediabox = ps->mediabox;
	*cropbox = ps->cropbox;
	return 0;
    }

    if (ps->file == NULL) {
	if (pdf_scan_open_file(ps) != 0) 
	    return -1;
    }
    objnum = pdf_scan_find_page(ps, pagenum);
    if (objnum <= 0) {
	pdf_scan_cleanup(ps);
	return -1;
    }
    if (pdf_scan_read_object(ps, objnum) < 0) {
	pdf_scan_cleanup(ps);
	return -1;
    }

    while (has_parent) {
	if (!found_rotate) {
	    p = dict_get(ps, "Rotate");
	    if (p.type == integertype) {
		*rotate = p.value.intval;
		found_rotate = TRUE;
	    }
	}
	if (!found_mediabox) {
	    p = dict_get(ps, "MediaBox");
	    if (pdf_scan_read_bbox(mediabox, p) == 0)
		found_mediabox = TRUE;
	}
	if (!found_cropbox) {
	    p = dict_get(ps, "CropBox");
	    if (pdf_scan_read_bbox(cropbox, p) == 0)
		found_cropbox = TRUE;
	}
        if (found_rotate && found_mediabox && found_cropbox)
	    break;

	p = dict_get(ps, "Parent");
	if (p.type == objtype) {
	    objref = pop_stack(ps);
	    if (pdf_scan_read_object(ps, p.value.objval) < 0) {
		push_stack(ps, objref);
		has_parent = FALSE;
	    }
	}
	else
	    has_parent = FALSE;
    }
    pop_stack(ps);
    if (!found_cropbox) {
	*cropbox = *mediabox;
	found_cropbox = TRUE;
    }
    if (!found_rotate) {
	*rotate = 0;
	found_rotate = TRUE;
    }

    pdf_scan_cleanup(ps);
    
    if (found_rotate && found_mediabox && found_cropbox) {
	/* cache these values */
	ps->pagenum = pagenum;
	ps->rotate = *rotate;
	ps->mediabox = *mediabox;
	ps->cropbox = *cropbox;
        return 0;
    }
    
    return -1;
}

/*****************************************************************/

#ifdef DEMO_PDFSCAN

int test_print_fn(void *handle, const char *ptr, int len)
{
    fwrite(ptr, 1, len, stdout);
    return len;
}

int main(int argc, char *argv[])
{
    PDFSCAN *ps;
    int i, count;
    int code;
    PDFBBOX mediabox, cropbox;
    int rotate;

    if (argc < 2) {
	fprintf(stdout, "Usage: cpdfscan filename\n");
	return 1;
    }

    ps = pdf_scan_open(argv[1], NULL, test_print_fn);
    if (ps) {
	count = pdf_scan_page_count(ps);
	pdf_scan_msgf(ps, "Page count is %d\n", count);
	for (i=0; i<count; i++) {
	    code = pdf_scan_page_media(ps, i, &rotate, &mediabox, &cropbox);
	    if (code == 0) {
	        fprintf(stdout, "Page %d /Rotate %d ", i+1, rotate);
	        fprintf(stdout, "/MediaBox [%g %g %g %g] /CropBox [%g %g %g %g]\n", 
		    mediabox.llx, mediabox.lly, mediabox.urx, mediabox.ury,
		    cropbox.llx, cropbox.lly, cropbox.urx, cropbox.ury);
	    }
	    else
	        fprintf(stdout, "Page %d media unknown\n", i+1);
	}
	pdf_scan_close(ps);
    }
    return 0;
}

#endif