PageRenderTime 56ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/pyexpat/interp_pyexpat.py

https://bitbucket.org/pypy/pypy/
Python | 869 lines | 792 code | 56 blank | 21 comment | 58 complexity | 1e30e9f2341303cc3e7109fd304e4ad8 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from pypy.interpreter.baseobjspace import W_Root
  2. from pypy.interpreter.typedef import TypeDef, GetSetProperty
  3. from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
  4. from pypy.interpreter.error import OperationError, oefmt
  5. from rpython.rlib import rgc, jit
  6. from rpython.rtyper.lltypesystem import rffi, lltype
  7. from rpython.rtyper.tool import rffi_platform
  8. from rpython.translator.tool.cbuild import ExternalCompilationInfo
  9. from rpython.translator.platform import platform
  10. import sys
  11. import weakref
  12. import py
  13. if sys.platform == "win32":
  14. libname = 'libexpat'
  15. pre_include_bits = ["#define XML_STATIC"]
  16. else:
  17. libname = 'expat'
  18. pre_include_bits = []
  19. eci = ExternalCompilationInfo(
  20. libraries=[libname],
  21. library_dirs=platform.preprocess_library_dirs([]),
  22. includes=['expat.h'],
  23. include_dirs=platform.preprocess_include_dirs([]),
  24. pre_include_bits = pre_include_bits,
  25. )
  26. eci = rffi_platform.configure_external_library(
  27. libname, eci,
  28. [dict(prefix='expat-',
  29. include_dir='lib', library_dir='win32/bin/release'),
  30. ])
  31. XML_Content_Ptr = lltype.Ptr(lltype.ForwardReference())
  32. XML_Parser = rffi.COpaquePtr(typedef='XML_Parser')
  33. xml_error_list = [
  34. "XML_ERROR_NO_MEMORY",
  35. "XML_ERROR_SYNTAX",
  36. "XML_ERROR_NO_ELEMENTS",
  37. "XML_ERROR_INVALID_TOKEN",
  38. "XML_ERROR_UNCLOSED_TOKEN",
  39. "XML_ERROR_PARTIAL_CHAR",
  40. "XML_ERROR_TAG_MISMATCH",
  41. "XML_ERROR_DUPLICATE_ATTRIBUTE",
  42. "XML_ERROR_JUNK_AFTER_DOC_ELEMENT",
  43. "XML_ERROR_PARAM_ENTITY_REF",
  44. "XML_ERROR_UNDEFINED_ENTITY",
  45. "XML_ERROR_RECURSIVE_ENTITY_REF",
  46. "XML_ERROR_ASYNC_ENTITY",
  47. "XML_ERROR_BAD_CHAR_REF",
  48. "XML_ERROR_BINARY_ENTITY_REF",
  49. "XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF",
  50. "XML_ERROR_MISPLACED_XML_PI",
  51. "XML_ERROR_UNKNOWN_ENCODING",
  52. "XML_ERROR_INCORRECT_ENCODING",
  53. "XML_ERROR_UNCLOSED_CDATA_SECTION",
  54. "XML_ERROR_EXTERNAL_ENTITY_HANDLING",
  55. "XML_ERROR_NOT_STANDALONE",
  56. "XML_ERROR_UNEXPECTED_STATE",
  57. "XML_ERROR_ENTITY_DECLARED_IN_PE",
  58. "XML_ERROR_FEATURE_REQUIRES_XML_DTD",
  59. "XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING",
  60. # Added in Expat 1.95.7.
  61. "XML_ERROR_UNBOUND_PREFIX",
  62. # Added in Expat 1.95.8.
  63. "XML_ERROR_UNDECLARING_PREFIX",
  64. "XML_ERROR_INCOMPLETE_PE",
  65. "XML_ERROR_XML_DECL",
  66. "XML_ERROR_TEXT_DECL",
  67. "XML_ERROR_PUBLICID",
  68. "XML_ERROR_SUSPENDED",
  69. "XML_ERROR_NOT_SUSPENDED",
  70. "XML_ERROR_ABORTED",
  71. "XML_ERROR_FINISHED",
  72. "XML_ERROR_SUSPEND_PE",
  73. ]
  74. xml_model_list = [
  75. "XML_CTYPE_EMPTY",
  76. "XML_CTYPE_ANY",
  77. "XML_CTYPE_MIXED",
  78. "XML_CTYPE_NAME",
  79. "XML_CTYPE_CHOICE",
  80. "XML_CTYPE_SEQ",
  81. "XML_CQUANT_NONE",
  82. "XML_CQUANT_OPT",
  83. "XML_CQUANT_REP",
  84. "XML_CQUANT_PLUS",
  85. ]
  86. class CConfigure:
  87. _compilation_info_ = eci
  88. XML_Content = rffi_platform.Struct('XML_Content', [
  89. ('numchildren', rffi.UINT),
  90. ('children', XML_Content_Ptr),
  91. ('name', rffi.CCHARP),
  92. ('type', rffi.INT),
  93. ('quant', rffi.INT),
  94. ])
  95. XML_Encoding = rffi_platform.Struct('XML_Encoding', [
  96. ('map', rffi.CFixedArray(rffi.INT, 1)),
  97. ('data', rffi.VOIDP),
  98. ('convert', rffi.VOIDP),
  99. ('release', rffi.VOIDP),
  100. ])
  101. for name in ['XML_PARAM_ENTITY_PARSING_NEVER',
  102. 'XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE',
  103. 'XML_PARAM_ENTITY_PARSING_ALWAYS']:
  104. locals()[name] = rffi_platform.ConstantInteger(name)
  105. XML_MAJOR_VERSION = rffi_platform.ConstantInteger('XML_MAJOR_VERSION')
  106. XML_MINOR_VERSION = rffi_platform.ConstantInteger('XML_MINOR_VERSION')
  107. XML_MICRO_VERSION = rffi_platform.ConstantInteger('XML_MICRO_VERSION')
  108. XML_FALSE = rffi_platform.ConstantInteger('XML_FALSE')
  109. XML_TRUE = rffi_platform.ConstantInteger('XML_TRUE')
  110. for name in xml_error_list:
  111. locals()[name] = rffi_platform.ConstantInteger(name)
  112. for name in xml_model_list:
  113. locals()[name] = rffi_platform.ConstantInteger(name)
  114. for name in xml_model_list:
  115. locals()[name] = rffi_platform.ConstantInteger(name)
  116. for name in xml_model_list:
  117. locals()[name] = rffi_platform.ConstantInteger(name)
  118. for name in xml_model_list:
  119. locals()[name] = rffi_platform.ConstantInteger(name)
  120. for name in xml_model_list:
  121. locals()[name] = rffi_platform.ConstantInteger(name)
  122. for name in xml_model_list:
  123. locals()[name] = rffi_platform.ConstantInteger(name)
  124. for name in xml_model_list:
  125. locals()[name] = rffi_platform.ConstantInteger(name)
  126. XML_Parser_SIZE = rffi_platform.SizeOf("XML_Parser")
  127. for k, v in rffi_platform.configure(CConfigure).items():
  128. globals()[k] = v
  129. XML_COMBINED_VERSION = 10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION
  130. XML_Content_Ptr.TO.become(rffi.CArray(XML_Content))
  131. XML_Encoding_Ptr = lltype.Ptr(XML_Encoding)
  132. def expat_external(*a, **kw):
  133. kw['compilation_info'] = eci
  134. return rffi.llexternal(*a, **kw)
  135. INTERNED_CCHARP = "INTERNED"
  136. HANDLERS = dict(
  137. StartElementHandler = [INTERNED_CCHARP, rffi.CCHARPP],
  138. EndElementHandler = [INTERNED_CCHARP],
  139. ProcessingInstructionHandler = [INTERNED_CCHARP, INTERNED_CCHARP],
  140. CharacterDataHandler = [rffi.CCHARP, rffi.INT],
  141. UnparsedEntityDeclHandler = [INTERNED_CCHARP] * 5,
  142. NotationDeclHandler = [INTERNED_CCHARP] * 4,
  143. StartNamespaceDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP],
  144. EndNamespaceDeclHandler = [INTERNED_CCHARP],
  145. CommentHandler = [rffi.CCHARP],
  146. StartCdataSectionHandler = [],
  147. EndCdataSectionHandler = [],
  148. DefaultHandler = [rffi.CCHARP, rffi.INT],
  149. DefaultHandlerExpand = [rffi.CCHARP, rffi.INT],
  150. NotStandaloneHandler = [],
  151. ExternalEntityRefHandler = [rffi.CCHARP] + [INTERNED_CCHARP] * 3,
  152. StartDoctypeDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP,
  153. INTERNED_CCHARP, rffi.INT],
  154. EndDoctypeDeclHandler = [],
  155. EntityDeclHandler = [INTERNED_CCHARP, rffi.INT, rffi.CCHARP, rffi.INT,
  156. INTERNED_CCHARP, INTERNED_CCHARP, INTERNED_CCHARP,
  157. INTERNED_CCHARP],
  158. XmlDeclHandler = [rffi.CCHARP, rffi.CCHARP, rffi.INT],
  159. ElementDeclHandler = [INTERNED_CCHARP, lltype.Ptr(XML_Content)],
  160. AttlistDeclHandler = [INTERNED_CCHARP, INTERNED_CCHARP,
  161. rffi.CCHARP, rffi.CCHARP, rffi.INT],
  162. )
  163. if XML_COMBINED_VERSION >= 19504:
  164. HANDLERS['SkippedEntityHandler'] = [INTERNED_CCHARP, rffi.INT]
  165. NB_HANDLERS = len(HANDLERS)
  166. class Storage:
  167. "Store objects under a non moving ID"
  168. def __init__(self):
  169. self.clear()
  170. def clear(self):
  171. self.next_id = 0
  172. self._last_object_id = -1
  173. self._last_object = None
  174. self.storage = {}
  175. @staticmethod
  176. def get_nonmoving_id(obj, id=-1):
  177. if id < 0:
  178. id = global_storage.next_id
  179. global_storage.next_id += 1
  180. global_storage.storage[id] = obj
  181. return id
  182. @staticmethod
  183. def get_object(id):
  184. if id == global_storage._last_object_id:
  185. return global_storage._last_object
  186. result = global_storage.storage[id]
  187. global_storage._last_object_id = id
  188. global_storage._last_object = result
  189. return result
  190. @staticmethod
  191. def free_nonmoving_id(id):
  192. if id == global_storage._last_object_id:
  193. global_storage._last_object = None
  194. global_storage._last_object_id = -1
  195. del global_storage.storage[id]
  196. global_storage = Storage()
  197. class CallbackData(W_Root):
  198. def __init__(self, space, parser):
  199. self.space = space
  200. self.parser = weakref.ref(parser)
  201. SETTERS = {}
  202. for index, (name, params) in enumerate(HANDLERS.items()):
  203. arg_names = ['arg%d' % (i,) for i in range(len(params))]
  204. warg_names = ['w_arg%d' % (i,) for i in range(len(params))]
  205. converters = []
  206. real_params = []
  207. for i, ARG in enumerate(params):
  208. # Some custom argument conversions
  209. if name == "StartElementHandler" and i == 1:
  210. converters.append(
  211. 'w_arg%d = parser.w_convert_attributes(space, arg%d)' % (i, i))
  212. elif name in ["CharacterDataHandler", "DefaultHandlerExpand", "DefaultHandler"] and i == 0:
  213. converters.append(
  214. 'w_arg%d = parser.w_convert_charp_n(space, arg%d, arg%d)' % (i, i, i+1))
  215. del warg_names[i+1]
  216. elif name in ["EntityDeclHandler"] and i == 2:
  217. converters.append(
  218. 'w_arg%d = parser.w_convert_charp_n(space, arg%d, arg%d)' % (i, i, i+1))
  219. del warg_names[i+1]
  220. # the standard conversions
  221. elif ARG == rffi.CCHARP:
  222. converters.append(
  223. 'w_arg%d = parser.w_convert_charp(space, arg%d)' % (i, i))
  224. elif ARG == INTERNED_CCHARP:
  225. converters.append(
  226. 'w_arg%d = parser.w_convert_interned(space, arg%d)' % (i, i))
  227. ARG = rffi.CCHARP
  228. elif ARG == lltype.Ptr(XML_Content):
  229. converters.append(
  230. 'w_arg%d = parser.w_convert_model(space, arg%d)' % (i, i))
  231. converters.append(
  232. 'XML_FreeContentModel(parser.itself, arg%d)' % (i,))
  233. else:
  234. converters.append(
  235. 'w_arg%d = space.wrap(arg%d)' % (i, i))
  236. real_params.append(ARG)
  237. converters = '; '.join(converters)
  238. args = ', '.join(arg_names)
  239. wargs = ', '.join(warg_names)
  240. if name in ['ExternalEntityRefHandler',
  241. 'NotStandaloneHandler']:
  242. result_type = rffi.INT
  243. result_converter = "rffi.cast(rffi.INT, space.int_w(w_result))"
  244. result_error = "rffi.cast(rffi.INT, 0)"
  245. else:
  246. result_type = lltype.Void
  247. result_converter = "None"
  248. result_error = "None"
  249. if name == 'CharacterDataHandler':
  250. pre_code = 'if parser.buffer_string(space, w_arg0, arg1): return'
  251. else:
  252. pre_code = 'parser.flush_character_buffer(space)'
  253. if name == 'ExternalEntityRefHandler':
  254. first_arg = 'll_parser'
  255. first_lltype = XML_Parser
  256. ll_id = 'XML_GetUserData(ll_parser)'
  257. post_code = 'if space.is_w(w_result, space.w_None): return 0'
  258. else:
  259. first_arg = 'll_userdata'
  260. first_lltype = rffi.VOIDP
  261. ll_id = 'll_userdata'
  262. post_code = ''
  263. src = py.code.Source("""
  264. @jit.jit_callback('XML:%(name)s')
  265. def %(name)s_callback(%(first_arg)s, %(args)s):
  266. id = rffi.cast(lltype.Signed, %(ll_id)s)
  267. userdata = global_storage.get_object(id)
  268. space = userdata.space
  269. parser = userdata.parser()
  270. handler = parser.handlers[%(index)s]
  271. if not handler:
  272. return %(result_error)s
  273. try:
  274. %(converters)s
  275. %(pre_code)s
  276. w_result = space.call_function(handler, %(wargs)s)
  277. %(post_code)s
  278. except OperationError, e:
  279. if not parser._exc_info: # don't override an existing exception
  280. parser._exc_info = e
  281. XML_StopParser(parser.itself, XML_FALSE)
  282. return %(result_error)s
  283. return %(result_converter)s
  284. callback = %(name)s_callback
  285. """ % locals())
  286. exec src.compile()
  287. c_name = 'XML_Set' + name
  288. callback_type = lltype.Ptr(lltype.FuncType(
  289. [first_lltype] + real_params, result_type))
  290. func = expat_external(c_name,
  291. [XML_Parser, callback_type], lltype.Void)
  292. SETTERS[name] = (index, func, callback)
  293. # special case for UnknownEncodingHandlerData:
  294. # XML_SetUnknownEncodingHandler() needs an additional argument,
  295. # and it's not modifiable via user code anyway
  296. def UnknownEncodingHandlerData_callback(ll_userdata, name, info):
  297. id = rffi.cast(lltype.Signed, ll_userdata)
  298. userdata = global_storage.get_object(id)
  299. space = userdata.space
  300. parser = userdata.parser()
  301. name = rffi.charp2str(name)
  302. try:
  303. parser.UnknownEncodingHandler(space, name, info)
  304. except OperationError as e:
  305. if not parser._exc_info:
  306. parser._exc_info = e
  307. XML_StopParser(parser.itself, XML_FALSE)
  308. result = 0
  309. else:
  310. result = 1
  311. return rffi.cast(rffi.INT, result)
  312. callback_type = lltype.Ptr(lltype.FuncType(
  313. [rffi.VOIDP, rffi.CCHARP, XML_Encoding_Ptr], rffi.INT))
  314. XML_SetUnknownEncodingHandler = expat_external(
  315. 'XML_SetUnknownEncodingHandler',
  316. [XML_Parser, callback_type, rffi.VOIDP], lltype.Void)
  317. # Declarations of external functions
  318. XML_ParserCreate = expat_external(
  319. 'XML_ParserCreate', [rffi.CCHARP], XML_Parser)
  320. XML_ParserCreateNS = expat_external(
  321. 'XML_ParserCreateNS', [rffi.CCHARP, rffi.CHAR], XML_Parser)
  322. XML_ParserFree = expat_external(
  323. 'XML_ParserFree', [XML_Parser], lltype.Void, releasegil=False)
  324. XML_SetUserData = expat_external(
  325. 'XML_SetUserData', [XML_Parser, rffi.VOIDP], lltype.Void)
  326. def XML_GetUserData(parser):
  327. # XXX is this always true?
  328. return rffi.cast(rffi.VOIDPP, parser)[0]
  329. XML_Parse = expat_external(
  330. 'XML_Parse', [XML_Parser, rffi.CCHARP, rffi.INT, rffi.INT], rffi.INT)
  331. XML_StopParser = expat_external(
  332. 'XML_StopParser', [XML_Parser, rffi.INT], lltype.Void)
  333. XML_SetReturnNSTriplet = expat_external(
  334. 'XML_SetReturnNSTriplet', [XML_Parser, rffi.INT], lltype.Void)
  335. XML_GetSpecifiedAttributeCount = expat_external(
  336. 'XML_GetSpecifiedAttributeCount', [XML_Parser], rffi.INT)
  337. XML_SetParamEntityParsing = expat_external(
  338. 'XML_SetParamEntityParsing', [XML_Parser, rffi.INT], lltype.Void)
  339. XML_SetBase = expat_external(
  340. 'XML_SetBase', [XML_Parser, rffi.CCHARP], lltype.Void)
  341. if XML_COMBINED_VERSION >= 19505:
  342. XML_UseForeignDTD = expat_external(
  343. 'XML_UseForeignDTD', [XML_Parser, rffi.INT], lltype.Void)
  344. XML_GetErrorCode = expat_external(
  345. 'XML_GetErrorCode', [XML_Parser], rffi.INT)
  346. XML_ErrorString = expat_external(
  347. 'XML_ErrorString', [rffi.INT],
  348. rffi.CCHARP)
  349. XML_GetCurrentLineNumber = expat_external(
  350. 'XML_GetCurrentLineNumber', [XML_Parser], rffi.INT)
  351. XML_GetErrorLineNumber = XML_GetCurrentLineNumber
  352. XML_GetCurrentColumnNumber = expat_external(
  353. 'XML_GetCurrentColumnNumber', [XML_Parser], rffi.INT)
  354. XML_GetErrorColumnNumber = XML_GetCurrentColumnNumber
  355. XML_GetCurrentByteIndex = expat_external(
  356. 'XML_GetCurrentByteIndex', [XML_Parser], rffi.INT)
  357. XML_GetErrorByteIndex = XML_GetCurrentByteIndex
  358. XML_FreeContentModel = expat_external(
  359. 'XML_FreeContentModel', [XML_Parser, lltype.Ptr(XML_Content)], lltype.Void)
  360. XML_ExternalEntityParserCreate = expat_external(
  361. 'XML_ExternalEntityParserCreate', [XML_Parser, rffi.CCHARP, rffi.CCHARP],
  362. XML_Parser)
  363. XML_ExpatVersion = expat_external(
  364. 'XML_ExpatVersion', [], rffi.CCHARP)
  365. def get_expat_version(space):
  366. return space.wrap(rffi.charp2str(XML_ExpatVersion()))
  367. def get_expat_version_info(space):
  368. return space.newtuple([
  369. space.wrap(XML_MAJOR_VERSION),
  370. space.wrap(XML_MINOR_VERSION),
  371. space.wrap(XML_MICRO_VERSION)])
  372. class Cache:
  373. def __init__(self, space):
  374. self.w_error = space.new_exception_class("pyexpat.ExpatError")
  375. class W_XMLParserType(W_Root):
  376. id = -1
  377. def __init__(self, space, parser, w_intern):
  378. self.itself = parser
  379. self.register_finalizer(space)
  380. self.w_intern = w_intern
  381. self.returns_unicode = True
  382. self.ordered_attributes = False
  383. self.specified_attributes = False
  384. self.handlers = [None] * NB_HANDLERS
  385. self.buffer = None
  386. self.buffer_size = 8192
  387. self.buffer_used = 0
  388. self.w_character_data_handler = None
  389. self._exc_info = None
  390. # Set user data for callback function
  391. self.id = global_storage.get_nonmoving_id(
  392. CallbackData(space, self))
  393. XML_SetUserData(self.itself, rffi.cast(rffi.VOIDP, self.id))
  394. def _finalize_(self):
  395. if XML_ParserFree: # careful with CPython interpreter shutdown
  396. if self.itself:
  397. XML_ParserFree(self.itself)
  398. self.itself = lltype.nullptr(XML_Parser.TO)
  399. if global_storage and self.id >= 0:
  400. try:
  401. global_storage.free_nonmoving_id(self.id)
  402. except KeyError:
  403. pass # maybe global_storage.clear() was already called
  404. self.id = -1
  405. @unwrap_spec(flag=int)
  406. def SetParamEntityParsing(self, space, flag):
  407. """SetParamEntityParsing(flag) -> success
  408. Controls parsing of parameter entities (including the external DTD
  409. subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
  410. XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
  411. XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
  412. was successful."""
  413. XML_SetParamEntityParsing(self.itself, flag)
  414. @unwrap_spec(w_flag=WrappedDefault(True))
  415. def UseForeignDTD(self, space, w_flag):
  416. """UseForeignDTD([flag])
  417. Allows the application to provide an artificial external subset if one is
  418. not specified as part of the document instance. This readily allows the
  419. use of a 'default' document type controlled by the application, while still
  420. getting the advantage of providing document type information to the parser.
  421. 'flag' defaults to True if not provided."""
  422. flag = space.is_true(w_flag)
  423. XML_UseForeignDTD(self.itself, flag)
  424. # Handlers management
  425. def w_convert(self, space, s):
  426. if self.returns_unicode:
  427. from pypy.interpreter.unicodehelper import decode_utf8
  428. return space.wrap(decode_utf8(space, s))
  429. else:
  430. return space.wrap(s)
  431. def w_convert_charp(self, space, data):
  432. if data:
  433. return self.w_convert(space, rffi.charp2str(data))
  434. else:
  435. return space.w_None
  436. def w_convert_interned(self, space, data):
  437. if not data:
  438. return space.w_None
  439. w_data = self.w_convert_charp(space, data)
  440. if not self.w_intern:
  441. return w_data
  442. try:
  443. return space.getitem(self.w_intern, w_data)
  444. except OperationError as e:
  445. if not e.match(space, space.w_KeyError):
  446. raise
  447. space.setitem(self.w_intern, w_data, w_data)
  448. return w_data
  449. def w_convert_charp_n(self, space, data, length):
  450. ll_length = rffi.cast(lltype.Signed, length)
  451. if data:
  452. return self.w_convert(space, rffi.charp2strn(data, ll_length))
  453. else:
  454. return space.w_None
  455. def w_convert_attributes(self, space, attrs):
  456. if self.specified_attributes:
  457. maxindex = XML_GetSpecifiedAttributeCount(self.itself)
  458. else:
  459. maxindex = 0
  460. while attrs[maxindex]:
  461. maxindex += 2 # copied
  462. if self.ordered_attributes:
  463. w_attrs = space.newlist([
  464. self.w_convert_charp(space, attrs[i])
  465. for i in range(maxindex)])
  466. else:
  467. w_attrs = space.newdict()
  468. for i in range(0, maxindex, 2):
  469. space.setitem(
  470. w_attrs,
  471. self.w_convert_charp(space, attrs[i]),
  472. self.w_convert_charp(space, attrs[i + 1]))
  473. return w_attrs
  474. def w_convert_model(self, space, model):
  475. children = [self.w_convert_model(space, model.c_children[i])
  476. for i in range(model.c_numchildren)]
  477. return space.newtuple([
  478. space.wrap(model.c_type),
  479. space.wrap(model.c_quant),
  480. self.w_convert_charp(space, model.c_name),
  481. space.newtuple(children)])
  482. def buffer_string(self, space, w_string, length):
  483. ll_length = rffi.cast(lltype.Signed, length)
  484. if self.buffer is not None:
  485. if self.buffer_used + ll_length > self.buffer_size:
  486. self.flush_character_buffer(space)
  487. # handler might have changed; drop the rest on the floor
  488. # if there isn't a handler anymore
  489. if self.w_character_data_handler is None:
  490. return True
  491. if ll_length <= self.buffer_size:
  492. self.buffer.append(w_string)
  493. self.buffer_used += ll_length
  494. return True
  495. else:
  496. self.buffer = []
  497. self.buffer_used = 0
  498. return False
  499. def gethandler(self, space, name, index):
  500. if name == 'CharacterDataHandler':
  501. return self.w_character_data_handler or space.w_None
  502. return self.handlers[index]
  503. def sethandler(self, space, name, w_handler, index, setter, handler):
  504. if name == 'CharacterDataHandler':
  505. self.flush_character_buffer(space)
  506. if space.is_w(w_handler, space.w_None):
  507. self.w_character_data_handler = None
  508. else:
  509. self.w_character_data_handler = w_handler
  510. #
  511. self.handlers[index] = w_handler
  512. setter(self.itself, handler)
  513. sethandler._annspecialcase_ = 'specialize:arg(2)'
  514. all_chars = ''.join(chr(i) for i in range(256))
  515. def UnknownEncodingHandler(self, space, name, info):
  516. # Yes, supports only 8bit encodings
  517. translationmap = space.unicode_w(
  518. space.call_method(
  519. space.wrap(self.all_chars), "decode",
  520. space.wrap(name), space.wrap("replace")))
  521. if len(translationmap) != 256:
  522. raise oefmt(space.w_ValueError,
  523. "multi-byte encodings are not supported")
  524. for i in range(256):
  525. c = translationmap[i]
  526. if c == u'\ufffd':
  527. info.c_map[i] = rffi.cast(rffi.INT, -1)
  528. else:
  529. info.c_map[i] = rffi.cast(rffi.INT, c)
  530. info.c_data = lltype.nullptr(rffi.VOIDP.TO)
  531. info.c_convert = lltype.nullptr(rffi.VOIDP.TO)
  532. info.c_release = lltype.nullptr(rffi.VOIDP.TO)
  533. return True
  534. @staticmethod
  535. def _make_property(name):
  536. index, setter, handler = SETTERS[name]
  537. #
  538. def descr_get_property(self, space):
  539. return self.gethandler(space, name, index)
  540. #
  541. def descr_set_property(self, space, w_value):
  542. return self.sethandler(space, name, w_value,
  543. index, setter, handler)
  544. #
  545. return GetSetProperty(descr_get_property,
  546. descr_set_property,
  547. cls=W_XMLParserType)
  548. def get_namespace_prefixes(self, space):
  549. raise oefmt(space.w_AttributeError,
  550. "not implemented: reading namespace_prefixes")
  551. @unwrap_spec(value=int)
  552. def set_namespace_prefixes(self, space, value):
  553. XML_SetReturnNSTriplet(self.itself, bool(value))
  554. # Parse methods
  555. @unwrap_spec(data=str, isfinal=bool)
  556. def Parse(self, space, data, isfinal=False):
  557. """Parse(data[, isfinal])
  558. Parse XML data. `isfinal' should be true at end of input."""
  559. res = XML_Parse(self.itself, data, len(data), isfinal)
  560. if self._exc_info:
  561. e = self._exc_info
  562. self._exc_info = None
  563. raise e
  564. elif res == 0:
  565. exc = self.set_error(space, XML_GetErrorCode(self.itself))
  566. raise exc
  567. self.flush_character_buffer(space)
  568. return space.wrap(res)
  569. def ParseFile(self, space, w_file):
  570. """ParseFile(file)
  571. Parse XML data from file-like object."""
  572. eof = False
  573. while not eof:
  574. w_data = space.call_method(w_file, 'read', space.wrap(2048))
  575. data = space.str_w(w_data)
  576. eof = len(data) == 0
  577. w_res = self.Parse(space, data, isfinal=eof)
  578. return w_res
  579. @unwrap_spec(base=str)
  580. def SetBase(self, space, base):
  581. XML_SetBase(self.itself, base)
  582. def ExternalEntityParserCreate(self, space, w_context, w_encoding=None):
  583. """ExternalEntityParserCreate(context[, encoding])
  584. Create a parser for parsing an external entity based on the
  585. information passed to the ExternalEntityRefHandler."""
  586. if space.is_w(w_context, space.w_None):
  587. context = None
  588. else:
  589. context = space.str_w(w_context)
  590. if space.is_none(w_encoding):
  591. encoding = None
  592. else:
  593. encoding = space.str_w(w_encoding)
  594. xmlparser = XML_ExternalEntityParserCreate(
  595. self.itself, context, encoding)
  596. if not xmlparser:
  597. raise MemoryError
  598. parser = W_XMLParserType(space, xmlparser, self.w_intern)
  599. # copy handlers from self
  600. for i in range(NB_HANDLERS):
  601. parser.handlers[i] = self.handlers[i]
  602. return space.wrap(parser)
  603. def flush_character_buffer(self, space):
  604. if not self.buffer:
  605. return
  606. w_data = space.call_function(
  607. space.getattr(space.wrap(''), space.wrap('join')),
  608. space.newlist(self.buffer))
  609. self.buffer = []
  610. self.buffer_used = 0
  611. if self.w_character_data_handler:
  612. space.call_function(self.w_character_data_handler, w_data)
  613. # Error management
  614. def set_error(self, space, code):
  615. err = rffi.charp2strn(XML_ErrorString(code), 200)
  616. lineno = XML_GetCurrentLineNumber(self.itself)
  617. colno = XML_GetCurrentColumnNumber(self.itself)
  618. msg = "%s: line %d, column %d" % (err, lineno, colno)
  619. w_errorcls = space.fromcache(Cache).w_error
  620. w_error = space.call_function(w_errorcls, space.wrap(msg))
  621. space.setattr(w_error, space.wrap("code"), space.wrap(code))
  622. space.setattr(w_error, space.wrap("offset"), space.wrap(colno))
  623. space.setattr(w_error, space.wrap("lineno"), space.wrap(lineno))
  624. self.w_error = w_error
  625. return OperationError(w_errorcls, w_error)
  626. def descr_ErrorCode(self, space):
  627. return space.wrap(XML_GetErrorCode(self.itself))
  628. def descr_ErrorLineNumber(self, space):
  629. return space.wrap(XML_GetErrorLineNumber(self.itself))
  630. def descr_ErrorColumnNumber(self, space):
  631. return space.wrap(XML_GetErrorColumnNumber(self.itself))
  632. def descr_ErrorByteIndex(self, space):
  633. return space.wrap(XML_GetErrorByteIndex(self.itself))
  634. def get_buffer_size(self, space):
  635. return space.wrap(self.buffer_size)
  636. def set_buffer_size(self, space, w_value):
  637. value = space.getindex_w(w_value, space.w_TypeError)
  638. if value <= 0:
  639. raise oefmt(space.w_ValueError,
  640. "buffer_size must be greater than zero")
  641. self.flush_character_buffer(space)
  642. self.buffer_size = value
  643. def get_buffer_text(self, space):
  644. return space.wrap(self.buffer is not None)
  645. def set_buffer_text(self, space, w_value):
  646. if space.is_true(w_value):
  647. self.buffer = []
  648. self.buffer_used = 0
  649. else:
  650. self.flush_character_buffer(space)
  651. self.buffer = None
  652. def get_intern(self, space):
  653. if self.w_intern:
  654. return self.w_intern
  655. else:
  656. return space.w_None
  657. def bool_property(name, cls, doc=None):
  658. def fget(space, obj):
  659. return space.wrap(getattr(obj, name))
  660. def fset(space, obj, value):
  661. setattr(obj, name, space.bool_w(value))
  662. return GetSetProperty(fget, fset, cls=cls, doc=doc)
  663. XMLParser_methods = ['Parse', 'ParseFile', 'SetBase', 'SetParamEntityParsing',
  664. 'ExternalEntityParserCreate']
  665. if XML_COMBINED_VERSION >= 19505:
  666. XMLParser_methods.append('UseForeignDTD')
  667. _XMLParser_extras = {}
  668. for name in XMLParser_methods:
  669. _XMLParser_extras[name] = interp2app(getattr(W_XMLParserType, name))
  670. for name in SETTERS:
  671. _XMLParser_extras[name] = W_XMLParserType._make_property(name)
  672. W_XMLParserType.typedef = TypeDef(
  673. "pyexpat.XMLParserType",
  674. __doc__ = "XML parser",
  675. namespace_prefixes = GetSetProperty(W_XMLParserType.get_namespace_prefixes,
  676. W_XMLParserType.set_namespace_prefixes,
  677. cls=W_XMLParserType),
  678. returns_unicode = bool_property('returns_unicode', W_XMLParserType),
  679. ordered_attributes = bool_property('ordered_attributes', W_XMLParserType),
  680. specified_attributes = bool_property('specified_attributes', W_XMLParserType),
  681. intern = GetSetProperty(W_XMLParserType.get_intern, cls=W_XMLParserType),
  682. buffer_size = GetSetProperty(W_XMLParserType.get_buffer_size,
  683. W_XMLParserType.set_buffer_size,
  684. cls=W_XMLParserType),
  685. buffer_text = GetSetProperty(W_XMLParserType.get_buffer_text,
  686. W_XMLParserType.set_buffer_text, cls=W_XMLParserType),
  687. ErrorCode = GetSetProperty(W_XMLParserType.descr_ErrorCode, cls=W_XMLParserType),
  688. ErrorLineNumber = GetSetProperty(W_XMLParserType.descr_ErrorLineNumber, cls=W_XMLParserType),
  689. ErrorColumnNumber = GetSetProperty(W_XMLParserType.descr_ErrorColumnNumber, cls=W_XMLParserType),
  690. ErrorByteIndex = GetSetProperty(W_XMLParserType.descr_ErrorByteIndex, cls=W_XMLParserType),
  691. CurrentLineNumber = GetSetProperty(W_XMLParserType.descr_ErrorLineNumber, cls=W_XMLParserType),
  692. CurrentColumnNumber = GetSetProperty(W_XMLParserType.descr_ErrorColumnNumber, cls=W_XMLParserType),
  693. CurrentByteIndex = GetSetProperty(W_XMLParserType.descr_ErrorByteIndex, cls=W_XMLParserType),
  694. **_XMLParser_extras
  695. )
  696. def ParserCreate(space, w_encoding=None, w_namespace_separator=None,
  697. w_intern=None):
  698. """ParserCreate([encoding[, namespace_separator]]) -> parser
  699. Return a new XML parser object."""
  700. if space.is_none(w_encoding):
  701. encoding = None
  702. elif space.isinstance_w(w_encoding, space.w_str):
  703. encoding = space.str_w(w_encoding)
  704. else:
  705. raise oefmt(space.w_TypeError,
  706. "ParserCreate() argument 1 must be string or None, not %T",
  707. w_encoding)
  708. if space.is_none(w_namespace_separator):
  709. namespace_separator = 0
  710. elif space.isinstance_w(w_namespace_separator, space.w_str):
  711. separator = space.str_w(w_namespace_separator)
  712. if len(separator) == 0:
  713. namespace_separator = 0
  714. elif len(separator) == 1:
  715. namespace_separator = ord(separator[0])
  716. else:
  717. raise oefmt(space.w_ValueError,
  718. "namespace_separator must be at most one character, "
  719. "omitted, or None")
  720. else:
  721. raise oefmt(space.w_TypeError,
  722. "ParserCreate() argument 2 must be string or None, not %T",
  723. w_namespace_separator)
  724. # Explicitly passing None means no interning is desired.
  725. # Not passing anything means that a new dictionary is used.
  726. if w_intern is None:
  727. w_intern = space.newdict()
  728. elif space.is_w(w_intern, space.w_None):
  729. w_intern = None
  730. if namespace_separator:
  731. xmlparser = XML_ParserCreateNS(
  732. encoding,
  733. rffi.cast(rffi.CHAR, namespace_separator))
  734. else:
  735. xmlparser = XML_ParserCreate(encoding)
  736. # Currently this is just the size of the pointer and some estimated bytes.
  737. # The struct isn't actually defined in expat.h - it is in xmlparse.c
  738. # XXX: find a good estimate of the XML_ParserStruct
  739. rgc.add_memory_pressure(XML_Parser_SIZE + 300)
  740. if not xmlparser:
  741. raise oefmt(space.w_RuntimeError, "XML_ParserCreate failed")
  742. parser = W_XMLParserType(space, xmlparser, w_intern)
  743. XML_SetUnknownEncodingHandler(
  744. parser.itself, UnknownEncodingHandlerData_callback,
  745. rffi.cast(rffi.VOIDP, parser.id))
  746. return space.wrap(parser)
  747. @unwrap_spec(code=int)
  748. def ErrorString(space, code):
  749. """ErrorString(errno) -> string
  750. Returns string error for given number."""
  751. return space.wrap(rffi.charp2str(XML_ErrorString(code)))