PageRenderTime 30ms CodeModel.GetById 15ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/framework/vendor/zend/Zend/Pdf/Cmap/SegmentToDelta.php

http://zoop.googlecode.com/
PHP | 407 lines | 164 code | 59 blank | 184 comment | 42 complexity | 9bc47b603b15eb3205910f09ea8b3c14 MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Pdf
 17 * @subpackage Fonts
 18 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 19 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 20 * @version    $Id: SegmentToDelta.php 20096 2010-01-06 02:05:09Z bkarwin $
 21 */
 22
 23/** Zend_Pdf_Cmap */
 24require_once 'Zend/Pdf/Cmap.php';
 25
 26
 27/**
 28 * Implements the "segment mapping to delta values" character map (type 4).
 29 *
 30 * This is the Microsoft standard mapping table type for OpenType fonts. It
 31 * provides the ability to cover multiple contiguous ranges of the Unicode
 32 * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
 33 *
 34 * @package    Zend_Pdf
 35 * @subpackage Fonts
 36 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 37 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 38 */
 39class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
 40{
 41  /**** Instance Variables ****/
 42
 43
 44    /**
 45     * The number of segments in the table.
 46     * @var integer
 47     */
 48    protected $_segmentCount = 0;
 49
 50    /**
 51     * The size of the binary search range for segments.
 52     * @var integer
 53     */
 54    protected $_searchRange = 0;
 55
 56    /**
 57     * The number of binary search steps required to cover the entire search
 58     * range.
 59     * @var integer
 60     */
 61    protected $_searchIterations = 0;
 62
 63    /**
 64     * Array of ending character codes for each segment.
 65     * @var array
 66     */
 67    protected $_segmentTableEndCodes = array();
 68
 69    /**
 70     * The ending character code for the segment at the end of the low search
 71     * range.
 72     * @var integer
 73     */
 74    protected $_searchRangeEndCode = 0;
 75
 76    /**
 77     * Array of starting character codes for each segment.
 78     * @var array
 79     */
 80    protected $_segmentTableStartCodes = array();
 81
 82    /**
 83     * Array of character code to glyph delta values for each segment.
 84     * @var array
 85     */
 86    protected $_segmentTableIdDeltas = array();
 87
 88    /**
 89     * Array of offsets into the glyph index array for each segment.
 90     * @var array
 91     */
 92    protected $_segmentTableIdRangeOffsets = array();
 93
 94    /**
 95     * Glyph index array. Stores glyph numbers, used with range offset.
 96     * @var array
 97     */
 98    protected $_glyphIndexArray = array();
 99
100
101
102  /**** Public Interface ****/
103
104
105  /* Concrete Class Implementation */
106
107    /**
108     * Returns an array of glyph numbers corresponding to the Unicode characters.
109     *
110     * If a particular character doesn't exist in this font, the special 'missing
111     * character glyph' will be substituted.
112     *
113     * See also {@link glyphNumberForCharacter()}.
114     *
115     * @param array $characterCodes Array of Unicode character codes (code points).
116     * @return array Array of glyph numbers.
117     */
118    public function glyphNumbersForCharacters($characterCodes)
119    {
120        $glyphNumbers = array();
121        foreach ($characterCodes as $key => $characterCode) {
122
123            /* These tables only cover the 16-bit character range.
124             */
125            if ($characterCode > 0xffff) {
126                $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
127                continue;
128            }
129
130            /* Determine where to start the binary search. The segments are
131             * ordered from lowest-to-highest. We are looking for the first
132             * segment whose end code is greater than or equal to our character
133             * code.
134             *
135             * If the end code at the top of the search range is larger, then
136             * our target is probably below it.
137             *
138             * If it is smaller, our target is probably above it, so move the
139             * search range to the end of the segment list.
140             */
141            if ($this->_searchRangeEndCode >= $characterCode) {
142                $searchIndex = $this->_searchRange;
143            } else {
144                $searchIndex = $this->_segmentCount;
145            }
146
147            /* Now do a binary search to find the first segment whose end code
148             * is greater or equal to our character code. No matter the number
149             * of segments (there may be hundreds in a large font), we will only
150             * need to perform $this->_searchIterations.
151             */
152            for ($i = 1; $i <= $this->_searchIterations; $i++) {
153                if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
154                    $subtableIndex = $searchIndex;
155                    $searchIndex -= $this->_searchRange >> $i;
156                } else {
157                    $searchIndex += $this->_searchRange >> $i;
158                }
159            }
160
161            /* If the segment's start code is greater than our character code,
162             * that character is not represented in this font. Move on.
163             */
164            if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
165                $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
166                continue;
167            }
168
169            if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
170                /* This segment uses a simple mapping from character code to
171                 * glyph number.
172                 */
173                $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
174
175            } else {
176                /* This segment relies on the glyph index array to determine the
177                 * glyph number. The calculation below determines the correct
178                 * index into that array. It's a little odd because the range
179                 * offset in the font file is designed to quickly provide an
180                 * address of the index in the raw binary data instead of the
181                 * index itself. Since we've parsed the data into arrays, we
182                 * must process it a bit differently.
183                 */
184                $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
185                               $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
186                               $subtableIndex - 1);
187                $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
188
189            }
190
191        }
192        return $glyphNumbers;
193    }
194
195    /**
196     * Returns the glyph number corresponding to the Unicode character.
197     *
198     * If a particular character doesn't exist in this font, the special 'missing
199     * character glyph' will be substituted.
200     *
201     * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
202     * operations.
203     *
204     * @param integer $characterCode Unicode character code (code point).
205     * @return integer Glyph number.
206     */
207    public function glyphNumberForCharacter($characterCode)
208    {
209        /* This code is pretty much a copy of glyphNumbersForCharacters().
210         * See that method for inline documentation.
211         */
212
213        if ($characterCode > 0xffff) {
214            return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
215        }
216
217        if ($this->_searchRangeEndCode >= $characterCode) {
218            $searchIndex = $this->_searchRange;
219        } else {
220            $searchIndex = $this->_segmentCount;
221        }
222
223        for ($i = 1; $i <= $this->_searchIterations; $i++) {
224            if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
225                $subtableIndex = $searchIndex;
226                $searchIndex -= $this->_searchRange >> $i;
227            } else {
228                $searchIndex += $this->_searchRange >> $i;
229            }
230        }
231
232        if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
233            return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
234        }
235
236        if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
237            $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
238        } else {
239            $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
240                           $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
241                           $subtableIndex - 1);
242            $glyphNumber = $this->_glyphIndexArray[$glyphIndex];
243        }
244        return $glyphNumber;
245    }
246
247    /**
248     * Returns an array containing the Unicode characters that have entries in
249     * this character map.
250     *
251     * @return array Unicode character codes.
252     */
253    public function getCoveredCharacters()
254    {
255        $characterCodes = array();
256        for ($i = 1; $i <= $this->_segmentCount; $i++) {
257            for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
258                $characterCodes[] = $code;
259            }
260        }
261        return $characterCodes;
262    }
263
264
265    /**
266     * Returns an array containing the glyphs numbers that have entries in this character map.
267     * Keys are Unicode character codes (integers)
268     *
269     * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
270     * call, but this method do it in more effective way (prepare complete list instead of searching
271     * glyph for each character code).
272     *
273     * @internal
274     * @return array Array representing <Unicode character code> => <glyph number> pairs.
275     */
276    public function getCoveredCharactersGlyphs()
277    {
278        $glyphNumbers = array();
279
280        for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
281            if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
282                $delta = $this->_segmentTableIdDeltas[$segmentNum];
283
284                for ($code =  $this->_segmentTableStartCodes[$segmentNum];
285                     $code <= $this->_segmentTableEndCodes[$segmentNum];
286                     $code++) {
287                    $glyphNumbers[$code] = ($code + $delta) % 65536;
288                }
289            } else {
290                $code       = $this->_segmentTableStartCodes[$segmentNum];
291                $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
292
293                while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
294                    $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
295
296                    $code++;
297                    $glyphIndex++;
298                }
299            }
300        }
301
302        return $glyphNumbers;
303    }
304
305
306
307  /* Object Lifecycle */
308
309    /**
310     * Object constructor
311     *
312     * Parses the raw binary table data. Throws an exception if the table is
313     * malformed.
314     *
315     * @param string $cmapData Raw binary cmap table data.
316     * @throws Zend_Pdf_Exception
317     */
318    public function __construct($cmapData)
319    {
320        /* Sanity check: The table should be at least 23 bytes in size.
321         */
322        $actualLength = strlen($cmapData);
323        if ($actualLength < 23) {
324            require_once 'Zend/Pdf/Exception.php';
325            throw new Zend_Pdf_Exception('Insufficient table data',
326                                         Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
327        }
328
329        /* Sanity check: Make sure this is right data for this table type.
330         */
331        $type = $this->_extractUInt2($cmapData, 0);
332        if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
333            require_once 'Zend/Pdf/Exception.php';
334            throw new Zend_Pdf_Exception('Wrong cmap table type',
335                                         Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
336        }
337
338        $length = $this->_extractUInt2($cmapData, 2);
339        if ($length != $actualLength) {
340            require_once 'Zend/Pdf/Exception.php';
341            throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
342                                         Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
343        }
344
345        /* Mapping tables should be language-independent. The font may not work
346         * as expected if they are not. Unfortunately, many font files in the
347         * wild incorrectly record a language ID in this field, so we can't
348         * call this a failure.
349         */
350        $language = $this->_extractUInt2($cmapData, 4);
351        if ($language != 0) {
352            // Record a warning here somehow?
353        }
354
355        /* These two values are stored premultiplied by two which is convienent
356         * when using the binary data directly, but we're parsing it out to
357         * native PHP data types, so divide by two.
358         */
359        $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
360        $this->_searchRange  = $this->_extractUInt2($cmapData, 8) >> 1;
361
362        $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
363
364        $offset = 14;
365        for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
366            $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
367        }
368
369        $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
370
371        $offset += 2;    // reserved bytes
372
373        for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
374            $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
375        }
376
377        for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
378            $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset);    // signed
379        }
380
381        /* The range offset helps determine the index into the glyph index array.
382         * Like the segment count and search range above, it's stored as a byte
383         * multiple in the font, so divide by two as we extract the values.
384         */
385        for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
386            $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
387        }
388
389        /* The size of the glyph index array varies by font and depends on the
390         * extent of the usage of range offsets versus deltas. Some fonts may
391         * not have any entries in this array.
392         */
393        for (; $offset < $length; $offset += 2) {
394            $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
395        }
396
397        /* Sanity check: After reading all of the data, we should be at the end
398         * of the table.
399         */
400        if ($offset != $length) {
401            require_once 'Zend/Pdf/Exception.php';
402            throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
403                                         Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
404        }
405    }
406
407}