PageRenderTime 47ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/Lib/site-packages/matplotlib/type1font.py

https://gitlab.com/pierreEffiScience/ImageLabeling
Python | 333 lines | 315 code | 2 blank | 16 comment | 16 complexity | 6bf49bb01372465b929cc8a4e05b87d7 MD5 | raw file
  1. """
  2. This module contains a class representing a Type 1 font.
  3. This version reads pfa and pfb files and splits them for embedding in
  4. pdf files. It also supports SlantFont and ExtendFont transformations,
  5. similarly to pdfTeX and friends. There is no support yet for
  6. subsetting.
  7. Usage::
  8. >>> font = Type1Font(filename)
  9. >>> clear_part, encrypted_part, finale = font.parts
  10. >>> slanted_font = font.transform({'slant': 0.167})
  11. >>> extended_font = font.transform({'extend': 1.2})
  12. Sources:
  13. * Adobe Technical Note #5040, Supporting Downloadable PostScript
  14. Language Fonts.
  15. * Adobe Type 1 Font Format, Adobe Systems Incorporated, third printing,
  16. v1.1, 1993. ISBN 0-201-57044-0.
  17. """
  18. from __future__ import (absolute_import, division, print_function,
  19. unicode_literals)
  20. from matplotlib.externals import six
  21. from matplotlib.externals.six import unichr
  22. import binascii
  23. import io
  24. import itertools
  25. import numpy as np
  26. import re
  27. import struct
  28. import sys
  29. if six.PY3:
  30. def ord(x):
  31. return x
  32. class Type1Font(object):
  33. """
  34. A class representing a Type-1 font, for use by backends.
  35. .. attribute:: parts
  36. A 3-tuple of the cleartext part, the encrypted part, and the
  37. finale of zeros.
  38. .. attribute:: prop
  39. A dictionary of font properties.
  40. """
  41. __slots__ = ('parts', 'prop')
  42. def __init__(self, input):
  43. """
  44. Initialize a Type-1 font. *input* can be either the file name of
  45. a pfb file or a 3-tuple of already-decoded Type-1 font parts.
  46. """
  47. if isinstance(input, tuple) and len(input) == 3:
  48. self.parts = input
  49. else:
  50. with open(input, 'rb') as file:
  51. data = self._read(file)
  52. self.parts = self._split(data)
  53. self._parse()
  54. def _read(self, file):
  55. """
  56. Read the font from a file, decoding into usable parts.
  57. """
  58. rawdata = file.read()
  59. if not rawdata.startswith(b'\x80'):
  60. return rawdata
  61. data = b''
  62. while len(rawdata) > 0:
  63. if not rawdata.startswith(b'\x80'):
  64. raise RuntimeError('Broken pfb file (expected byte 128, '
  65. 'got %d)' % ord(rawdata[0]))
  66. type = ord(rawdata[1])
  67. if type in (1, 2):
  68. length, = struct.unpack(str('<i'), rawdata[2:6])
  69. segment = rawdata[6:6 + length]
  70. rawdata = rawdata[6 + length:]
  71. if type == 1: # ASCII text: include verbatim
  72. data += segment
  73. elif type == 2: # binary data: encode in hexadecimal
  74. data += binascii.hexlify(segment)
  75. elif type == 3: # end of file
  76. break
  77. else:
  78. raise RuntimeError('Unknown segment type %d in pfb file' %
  79. type)
  80. return data
  81. def _split(self, data):
  82. """
  83. Split the Type 1 font into its three main parts.
  84. The three parts are: (1) the cleartext part, which ends in a
  85. eexec operator; (2) the encrypted part; (3) the fixed part,
  86. which contains 512 ASCII zeros possibly divided on various
  87. lines, a cleartomark operator, and possibly something else.
  88. """
  89. # Cleartext part: just find the eexec and skip whitespace
  90. idx = data.index(b'eexec')
  91. idx += len(b'eexec')
  92. while data[idx] in b' \t\r\n':
  93. idx += 1
  94. len1 = idx
  95. # Encrypted part: find the cleartomark operator and count
  96. # zeros backward
  97. idx = data.rindex(b'cleartomark') - 1
  98. zeros = 512
  99. while zeros and data[idx] in b'0' or data[idx] in b'\r\n':
  100. if data[idx] in b'0':
  101. zeros -= 1
  102. idx -= 1
  103. if zeros:
  104. raise RuntimeError('Insufficiently many zeros in Type 1 font')
  105. # Convert encrypted part to binary (if we read a pfb file, we
  106. # may end up converting binary to hexadecimal to binary again;
  107. # but if we read a pfa file, this part is already in hex, and
  108. # I am not quite sure if even the pfb format guarantees that
  109. # it will be in binary).
  110. binary = binascii.unhexlify(data[len1:idx+1])
  111. return data[:len1], binary, data[idx+1:]
  112. _whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
  113. _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
  114. _comment_re = re.compile(br'%[^\r\n\v]*')
  115. _instring_re = re.compile(br'[()\\]')
  116. # token types, compared via object identity (poor man's enum)
  117. _whitespace = object()
  118. _name = object()
  119. _string = object()
  120. _delimiter = object()
  121. _number = object()
  122. @classmethod
  123. def _tokens(cls, text):
  124. """
  125. A PostScript tokenizer. Yield (token, value) pairs such as
  126. (cls._whitespace, ' ') or (cls._name, '/Foobar').
  127. """
  128. pos = 0
  129. while pos < len(text):
  130. match = (cls._comment_re.match(text[pos:]) or
  131. cls._whitespace_re.match(text[pos:]))
  132. if match:
  133. yield (cls._whitespace, match.group())
  134. pos += match.end()
  135. elif text[pos] == b'(':
  136. start = pos
  137. pos += 1
  138. depth = 1
  139. while depth:
  140. match = cls._instring_re.search(text[pos:])
  141. if match is None:
  142. return
  143. pos += match.end()
  144. if match.group() == b'(':
  145. depth += 1
  146. elif match.group() == b')':
  147. depth -= 1
  148. else: # a backslash - skip the next character
  149. pos += 1
  150. yield (cls._string, text[start:pos])
  151. elif text[pos:pos + 2] in (b'<<', b'>>'):
  152. yield (cls._delimiter, text[pos:pos + 2])
  153. pos += 2
  154. elif text[pos] == b'<':
  155. start = pos
  156. pos += text[pos:].index(b'>')
  157. yield (cls._string, text[start:pos])
  158. else:
  159. match = cls._token_re.match(text[pos:])
  160. if match:
  161. try:
  162. float(match.group())
  163. yield (cls._number, match.group())
  164. except ValueError:
  165. yield (cls._name, match.group())
  166. pos += match.end()
  167. else:
  168. yield (cls._delimiter, text[pos:pos + 1])
  169. pos += 1
  170. def _parse(self):
  171. """
  172. Find the values of various font properties. This limited kind
  173. of parsing is described in Chapter 10 "Adobe Type Manager
  174. Compatibility" of the Type-1 spec.
  175. """
  176. # Start with reasonable defaults
  177. prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
  178. 'UnderlinePosition': -100, 'UnderlineThickness': 50}
  179. filtered = ((token, value)
  180. for token, value in self._tokens(self.parts[0])
  181. if token is not self._whitespace)
  182. # The spec calls this an ASCII format; in Python 2.x we could
  183. # just treat the strings and names as opaque bytes but let's
  184. # turn them into proper Unicode, and be lenient in case of high bytes.
  185. convert = lambda x: x.decode('ascii', 'replace')
  186. for token, value in filtered:
  187. if token is self._name and value.startswith(b'/'):
  188. key = convert(value[1:])
  189. token, value = next(filtered)
  190. if token is self._name:
  191. if value in (b'true', b'false'):
  192. value = value == b'true'
  193. else:
  194. value = convert(value.lstrip(b'/'))
  195. elif token is self._string:
  196. value = convert(value.lstrip(b'(').rstrip(b')'))
  197. elif token is self._number:
  198. if b'.' in value:
  199. value = float(value)
  200. else:
  201. value = int(value)
  202. else: # more complicated value such as an array
  203. value = None
  204. if key != 'FontInfo' and value is not None:
  205. prop[key] = value
  206. # Fill in the various *Name properties
  207. if 'FontName' not in prop:
  208. prop['FontName'] = (prop.get('FullName') or
  209. prop.get('FamilyName') or
  210. 'Unknown')
  211. if 'FullName' not in prop:
  212. prop['FullName'] = prop['FontName']
  213. if 'FamilyName' not in prop:
  214. extras = r'(?i)([ -](regular|plain|italic|oblique|(semi)?bold|(ultra)?light|extra|condensed))+$'
  215. prop['FamilyName'] = re.sub(extras, '', prop['FullName'])
  216. self.prop = prop
  217. @classmethod
  218. def _transformer(cls, tokens, slant, extend):
  219. def fontname(name):
  220. result = name
  221. if slant:
  222. result += b'_Slant_' + str(int(1000 * slant)).encode('latin-1')
  223. if extend != 1.0:
  224. result += b'_Extend_' + str(int(1000 * extend)).encode('latin-1')
  225. return result
  226. def italicangle(angle):
  227. return str(float(angle) - np.arctan(slant) / np.pi * 180).encode('latin-1')
  228. def fontmatrix(array):
  229. array = array.lstrip(b'[').rstrip(b']').strip().split()
  230. array = [float(x) for x in array]
  231. oldmatrix = np.eye(3, 3)
  232. oldmatrix[0:3, 0] = array[::2]
  233. oldmatrix[0:3, 1] = array[1::2]
  234. modifier = np.array([[extend, 0, 0],
  235. [slant, 1, 0],
  236. [0, 0, 1]])
  237. newmatrix = np.dot(modifier, oldmatrix)
  238. array[::2] = newmatrix[0:3, 0]
  239. array[1::2] = newmatrix[0:3, 1]
  240. as_string = u'[' + u' '.join(str(x) for x in array) + u']'
  241. return as_string.encode('latin-1')
  242. def replace(fun):
  243. def replacer(tokens):
  244. token, value = next(tokens) # name, e.g., /FontMatrix
  245. yield bytes(value)
  246. token, value = next(tokens) # possible whitespace
  247. while token is cls._whitespace:
  248. yield bytes(value)
  249. token, value = next(tokens)
  250. if value != b'[': # name/number/etc.
  251. yield bytes(fun(value))
  252. else: # array, e.g., [1 2 3]
  253. result = b''
  254. while value != b']':
  255. result += value
  256. token, value = next(tokens)
  257. result += value
  258. yield fun(result)
  259. return replacer
  260. def suppress(tokens):
  261. for x in itertools.takewhile(lambda x: x[1] != b'def', tokens):
  262. pass
  263. yield b''
  264. table = {b'/FontName': replace(fontname),
  265. b'/ItalicAngle': replace(italicangle),
  266. b'/FontMatrix': replace(fontmatrix),
  267. b'/UniqueID': suppress}
  268. while True:
  269. token, value = next(tokens)
  270. if token is cls._name and value in table:
  271. for value in table[value](itertools.chain([(token, value)],
  272. tokens)):
  273. yield value
  274. else:
  275. yield value
  276. def transform(self, effects):
  277. """
  278. Transform the font by slanting or extending. *effects* should
  279. be a dict where ``effects['slant']`` is the tangent of the
  280. angle that the font is to be slanted to the right (so negative
  281. values slant to the left) and ``effects['extend']`` is the
  282. multiplier by which the font is to be extended (so values less
  283. than 1.0 condense). Returns a new :class:`Type1Font` object.
  284. """
  285. with io.BytesIO() as buffer:
  286. tokenizer = self._tokens(self.parts[0])
  287. transformed = self._transformer(tokenizer,
  288. slant=effects.get('slant', 0.0),
  289. extend=effects.get('extend', 1.0))
  290. list(map(buffer.write, transformed))
  291. return Type1Font((buffer.getvalue(), self.parts[1], self.parts[2]))