/src/lib/xml/xml_parser.e

http://github.com/tybor/Liberty · Specman e · 731 lines · 113 code · 8 blank · 610 comment · 11 complexity · b51a3edbbc1f8aade5e640b58597fcd2 MD5 · raw file

  1. -- See the Copyright notice at the end of this file.
  2. --
  3. class XML_PARSER
  4. --
  5. -- The standard Liberty Eiffel XML parser. It is able to parse any well-formed WML document, and also can
  6. -- validate a document containing a DTD.
  7. --
  8. -- Note that this parser is not namespace-aware, nor does it validate documents using a Schema. Classes
  9. -- to that avail must be built on top of this parser.
  10. --
  11. -- See http://www.w3.org/TR/2006/REC-xml11-20060816/
  12. --
  13. -- See also XML_CALLBACKS which is called by this parser when a parsing event occurs.
  14. --
  15. insert
  16. XML_PARSER_TOOLS
  17. redefine
  18. next, end_of_input
  19. end
  20. URL_VALIDITY
  21. create {ANY}
  22. connect_to, make
  23. feature {ANY}
  24. parse (a_callbacks: like callbacks)
  25. -- Parse an XML documents by sending parsing events to the given `callbacks'
  26. require
  27. is_connected
  28. local
  29. pn: like parse_node
  30. do
  31. callbacks := a_callbacks
  32. pn := parse_node(True)
  33. if validator /= Void then
  34. validator.the_end
  35. end
  36. end
  37. connect_to (a_url: URL)
  38. require
  39. not is_connected
  40. a_url.is_connected implies a_url.read
  41. do
  42. make
  43. if not a_url.is_connected and then a_url.can_connect then
  44. a_url.read_only
  45. a_url.connect
  46. end
  47. if a_url.is_connected then
  48. check
  49. a_url.read
  50. end
  51. connect_buffer(a_url, Void, Void)
  52. end
  53. ensure
  54. a_url.is_connected implies (a_url = buffer.url and then is_connected)
  55. end
  56. disconnect
  57. require
  58. is_connected
  59. do
  60. from
  61. until
  62. not is_connected
  63. loop
  64. disconnect_buffer
  65. end
  66. ensure
  67. not is_connected
  68. end
  69. is_connected: BOOLEAN
  70. do
  71. Result := open_buffers /= Void and then not open_buffers.is_empty and then buffer.is_connected
  72. end
  73. feature {}
  74. callbacks: XML_CALLBACKS
  75. url: URL
  76. set_url (a_url: like url)
  77. do
  78. url := a_url
  79. end
  80. parse_node (at_root: BOOLEAN): INTEGER
  81. -- True if a node was successfully parsed
  82. require
  83. is_connected
  84. local
  85. name, entity, entity_value, entity_url, data, data_blanks, pi_target, pi_data: UNICODE_STRING
  86. again, done, open, open_close: BOOLEAN; l, c: INTEGER
  87. do
  88. from
  89. again := True
  90. invariant
  91. is_connected
  92. until
  93. not again or else callbacks.at_error
  94. loop
  95. again := False
  96. skip_blanks
  97. l := line
  98. c := column
  99. from
  100. data := once U""
  101. data.clear_count
  102. data_blanks := once U""
  103. data_blanks.clear_count
  104. until
  105. end_of_input or else skip('<')
  106. loop
  107. if is_separator(current_character) then
  108. if data.is_empty then
  109. data_blanks.add_last(current_character)
  110. else
  111. data.add_last(current_character)
  112. end
  113. else
  114. if not data_blanks.is_empty then
  115. data.append_string(data_blanks)
  116. data_blanks.clear_count
  117. end
  118. if current_character = '&'.code then
  119. l := line
  120. c := column
  121. next
  122. entity := read_identifier
  123. if current_character = ';'.code then
  124. if entity.is_equal(once U"lt") then
  125. data.add_last('<'.code)
  126. next
  127. elseif entity.is_equal(once U"gt") then
  128. data.add_last('>'.code)
  129. next
  130. elseif entity.is_equal(once U"amp") then
  131. data.add_last('&'.code)
  132. next
  133. elseif entity.is_equal(once U"apos") then
  134. data.add_last('%''.code)
  135. next
  136. elseif entity.is_equal(once U"quot") then
  137. data.add_last('"'.code)
  138. next
  139. else
  140. if validator /= Void then
  141. entity_value := validator.entity(entity, l, c)
  142. if entity_value = Void then
  143. entity_value := callbacks.entity(entity, l, c)
  144. entity_url := Void
  145. else
  146. entity_url := validator.entity_url(entity, l, c)
  147. end
  148. else
  149. entity_value := callbacks.entity(entity, l, c)
  150. entity_url := Void
  151. end
  152. if entity_value = Void then
  153. callbacks.parse_error(l, c, once "Unknown entity")
  154. Result := Parse_error
  155. else
  156. next
  157. connect_buffer_entity_value(entity, entity_value, entity_url)
  158. end
  159. end
  160. else
  161. callbacks.parse_error(l, c, once "Missing ';'")
  162. Result := Parse_error
  163. end
  164. else
  165. data.add_last(current_character)
  166. next
  167. end
  168. end
  169. end
  170. if not data.is_empty then
  171. if validator /= Void and then not validator.is_valid_data(data, l, c) then
  172. callbacks.parse_error(l, c, once "Invalid data")
  173. Result := Parse_error
  174. else
  175. if validator /= Void then
  176. validator.data(data, l, c)
  177. end
  178. callbacks.data(data, l, c)
  179. end
  180. end
  181. if Result /= Parse_error then
  182. if end_of_input then
  183. Result := Parse_done
  184. else
  185. if skip('/') then
  186. skip_blanks
  187. l := line
  188. c := column
  189. name := read_name
  190. if name = Void then
  191. callbacks.parse_error(l, c, once "Closing tag name expected")
  192. Result := Parse_error
  193. else
  194. skip_blanks
  195. if not skip('>') then
  196. callbacks.parse_error(l, c, once "Missing '>'")
  197. Result := Parse_error
  198. else
  199. if callbacks.current_node.is_equal(name) then
  200. if validator = Void then
  201. callbacks.close_node(name, l, c)
  202. Result := Parse_done
  203. elseif not validator.is_valid_close_node(name, l, c) then
  204. callbacks.parse_error(l, c, once "Invalid closing tag")
  205. Result := Parse_error
  206. else
  207. validator.close_node(name, l, c)
  208. callbacks.close_node(name, l, c)
  209. Result := Parse_done
  210. end
  211. else
  212. callbacks.parse_error(l, c, once "Unexpected closing tag")
  213. Result := Parse_error
  214. end
  215. end
  216. end
  217. elseif skip('!') then
  218. if skip2('-', '-') then
  219. from
  220. until
  221. skip2('-', '-') and then skip('>')
  222. loop
  223. next
  224. end
  225. again := True
  226. elseif skip_word(once "[CDATA[") then
  227. read_cdata
  228. again := True
  229. elseif at_root and then skip_word(once "DOCTYPE") then
  230. read_dtd
  231. again := True
  232. else
  233. callbacks.parse_error(l, c, once "Syntax error")
  234. Result := Parse_error
  235. end
  236. elseif skip('?') then
  237. skip_blanks
  238. if skip_word(once "xml") then
  239. from
  240. until
  241. done
  242. loop
  243. skip_blanks
  244. if skip('?') then
  245. if not skip('>') then
  246. callbacks.parse_error(l, c, once "Missing '>'")
  247. Result := Parse_error
  248. else
  249. done := True
  250. end
  251. else
  252. Result := parse_attribute(buffer)
  253. done := Result = Parse_done
  254. end
  255. end
  256. if Result /= Parse_error then
  257. if at_root then
  258. callbacks.xml_header(l, c)
  259. else
  260. -- ignored!! (valid xml file included via an entity?)
  261. end
  262. again := True
  263. end
  264. else
  265. pi_target := read_identifier
  266. pi_data := once U""
  267. pi_data.clear_count
  268. from
  269. until
  270. done
  271. loop
  272. if skip('?') then
  273. if skip('>') then
  274. done := True
  275. else
  276. pi_data.extend('?'.code)
  277. end
  278. else
  279. pi_data.extend(current_character)
  280. next
  281. end
  282. if end_of_input then
  283. done := True
  284. end
  285. end
  286. if not end_of_input then
  287. callbacks.processing_instruction(pi_target, pi_data)
  288. Result := Parse_done
  289. else
  290. Result := Parse_error
  291. end
  292. end
  293. else
  294. skip_blanks
  295. l := line
  296. c := column
  297. name := read_name
  298. if name = Void then
  299. callbacks.parse_error(l, c, once "Opening tag name expected")
  300. Result := Parse_error
  301. else
  302. skip_blanks
  303. if skip('>') then
  304. if validator /= Void and then not validator.is_valid_open_node(name, l, c) then
  305. callbacks.parse_error(l, c, once "Invalid opening tag")
  306. Result := Parse_error
  307. else
  308. if validator /= Void then
  309. validator.open_node(name, l, c)
  310. end
  311. callbacks.open_node(name, l, c)
  312. end
  313. Result := parse_children
  314. elseif skip2('/', '>') then
  315. if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then
  316. callbacks.parse_error(l, c, once "Invalid empty tag")
  317. Result := Parse_error
  318. else
  319. if validator /= Void then
  320. validator.open_close_node(name, l, c)
  321. end
  322. callbacks.open_close_node(name, l, c)
  323. end
  324. Result := Parse_again
  325. else
  326. from
  327. Result := parse_attribute(Void)
  328. if skip('>') then
  329. done := True
  330. open := True
  331. open_close := False
  332. elseif skip2('/', '>') then
  333. done := True
  334. open := False
  335. open_close := True
  336. else
  337. done := Result /= Parse_again
  338. open := False
  339. open_close := False
  340. end
  341. until
  342. done
  343. loop
  344. Result := parse_attribute(Void)
  345. if skip('>') then
  346. done := True
  347. open := True
  348. open_close := False
  349. elseif skip2('/', '>') then
  350. done := True
  351. open := False
  352. open_close := True
  353. else
  354. done := Result /= Parse_again
  355. open := False
  356. open_close := False
  357. end
  358. end
  359. if Result /= Parse_error then
  360. if open then
  361. if validator /= Void and then not validator.is_valid_open_node(name, l, c) then
  362. callbacks.parse_error(l, c, once "Invalid opening tag")
  363. Result := Parse_error
  364. else
  365. if validator /= Void then
  366. validator.open_node(name, l, c)
  367. end
  368. callbacks.open_node(name, l, c)
  369. Result := parse_children
  370. end
  371. elseif open_close then
  372. if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then
  373. callbacks.parse_error(l, c, once "Invalid empty tag")
  374. Result := Parse_error
  375. else
  376. if validator /= Void then
  377. validator.open_close_node(name, l, c)
  378. end
  379. callbacks.open_close_node(name, l, c)
  380. end
  381. end
  382. end
  383. end
  384. end
  385. end
  386. end
  387. end
  388. end
  389. if callbacks.at_error then
  390. Result := Parse_error
  391. end
  392. ensure
  393. (
  394. <<Parse_again, Parse_done, Parse_error>>).has(Result)
  395. end
  396. read_name: UNICODE_STRING
  397. local
  398. name: UNICODE_STRING
  399. do
  400. name := read_identifier
  401. if name /= Void then
  402. Result := once U""
  403. Result.copy(name)
  404. end
  405. end
  406. parse_attribute (a_buffer: UNICODE_PARSER_BUFFER): INTEGER
  407. -- if `a_buffer' if not Void and the attribute is "encoding", set the buffer's encoding.
  408. local
  409. a, an_attribute, value: UNICODE_STRING; l, c: INTEGER; sa, sv: STRING
  410. do
  411. skip_blanks
  412. l := line
  413. c := column
  414. a := read_identifier
  415. if a = Void then
  416. Result := Parse_done
  417. else
  418. skip_blanks
  419. if not skip('=') then
  420. callbacks.parse_error(l, c, once "Missing '='")
  421. Result := Parse_error
  422. else
  423. skip_blanks
  424. an_attribute := once U""
  425. an_attribute.copy(a)
  426. debug
  427. sa := an_attribute.as_utf8
  428. end
  429. value := read_string
  430. if value = Void then
  431. callbacks.parse_error(l, c, once "Value expected")
  432. Result := Parse_error
  433. else
  434. if buffer /= Void then
  435. sa := once ""
  436. sa.clear_count
  437. an_attribute.utf8_encode_in(sa)
  438. inspect
  439. sa
  440. when "encoding" then
  441. sv := once ""
  442. value.utf8_encode_in(sv)
  443. buffer.set_encoding(sv)
  444. else
  445. end
  446. end
  447. if validator /= Void then
  448. validator.with_attribute(an_attribute, value, l, c)
  449. end
  450. callbacks.with_attribute(an_attribute, value, l, c)
  451. skip_blanks
  452. end
  453. end
  454. end
  455. end
  456. parse_children: INTEGER
  457. do
  458. from
  459. Result := parse_node(False)
  460. until
  461. Result /= Parse_again or else callbacks.at_error
  462. loop
  463. Result := parse_node(False)
  464. end
  465. if callbacks.at_error then
  466. Result := Parse_error
  467. elseif Result /= Parse_error then
  468. Result := Parse_again
  469. end
  470. ensure
  471. (<<Parse_again, Parse_done, Parse_error>>).has(Result)
  472. end
  473. read_cdata
  474. local
  475. l, c, s: INTEGER; b: UNICODE_STRING
  476. do
  477. l := line
  478. c := column
  479. from
  480. b := once U""
  481. b.clear_count
  482. until
  483. s < 0
  484. loop
  485. inspect
  486. s
  487. when 0 then
  488. if current_character = ']'.code then
  489. s := 2
  490. else
  491. b.extend('%N'.code)
  492. b.extend(current_character)
  493. s := 0
  494. end
  495. when 2 then
  496. if current_character = ']'.code then
  497. s := 3
  498. else
  499. b.extend(']'.code)
  500. b.extend(current_character)
  501. s := 0
  502. end
  503. when 3 then
  504. if current_character = '>'.code then
  505. s := -1
  506. else
  507. b.append(once U"]]")
  508. b.extend(current_character)
  509. s := 0
  510. end
  511. end
  512. next
  513. end
  514. if validator /= Void and then not validator.is_valid_data(b, l, c) then
  515. callbacks.parse_error(l, c, once "Invalid CDATA")
  516. else
  517. if validator /= Void then
  518. validator.data(b, l, c)
  519. end
  520. callbacks.data(b, l, c)
  521. end
  522. end
  523. read_dtd
  524. require
  525. is_connected
  526. do
  527. callbacks.set_validator(dtd_parser.parse(buffer))
  528. skip_blanks
  529. if dtd_parser.has_error then
  530. callbacks.parse_error(line, column, dtd_parser.error_message)
  531. end
  532. end
  533. feature {}
  534. Parse_again: INTEGER 0
  535. Parse_done: INTEGER 1
  536. Parse_error: INTEGER -1
  537. feature {}
  538. next
  539. do
  540. if buffer.end_of_input then
  541. disconnect_buffer
  542. end
  543. buffer.next
  544. end
  545. end_of_input: BOOLEAN
  546. do
  547. Result := buffer.end_of_input and then open_buffers.count = 1
  548. end
  549. feature {}
  550. buffer: UNICODE_PARSER_BUFFER
  551. do
  552. if not open_buffers.is_empty then
  553. Result := open_buffers.top.buffer
  554. end
  555. ensure
  556. definition: open_buffers.is_empty or else Result = open_buffers.top.buffer
  557. end
  558. make
  559. -- Create a not connected parser
  560. do
  561. create open_buffers.make
  562. create urls.make
  563. end
  564. dtd_parser: XML_DTD_PARSER
  565. once
  566. create Result.make
  567. end
  568. validator: XML_VALIDATOR
  569. do
  570. Result := callbacks.validator
  571. end
  572. open_buffers: STACK[XML_PARSER_BUFFER]
  573. urls: STACK[URL]
  574. closed_buffers: RECYCLING_POOL[UNICODE_PARSER_BUFFER]
  575. once
  576. create Result.make
  577. end
  578. connect_buffer (a_url, a_face_url: URL; a_entity_name: UNICODE_STRING)
  579. require
  580. a_face_url /= Void implies a_entity_name /= Void
  581. local
  582. buf: like buffer
  583. do
  584. debug
  585. io.put_string(once "connect_buffer(%"")
  586. io.put_string(a_url.out)
  587. io.put_string(once "%", ")
  588. if a_face_url = Void then
  589. io.put_string(once "Void")
  590. else
  591. io.put_character('"')
  592. io.put_string(a_face_url.out)
  593. io.put_character('"')
  594. end
  595. io.put_string(once ", ")
  596. if a_entity_name = Void then
  597. io.put_line(once "Void)")
  598. else
  599. io.put_character('"')
  600. io.put_string(a_entity_name.as_utf8)
  601. io.put_line(once "%")")
  602. end
  603. end
  604. if not closed_buffers.is_empty then
  605. buf := closed_buffers.item
  606. end
  607. if buf = Void then
  608. create buf.connect_to(a_url, Void)
  609. else
  610. buf.connect_to(a_url, Void)
  611. end
  612. if buf.is_connected then
  613. check
  614. a_url = buf.url
  615. end
  616. urls.push(a_url)
  617. if a_face_url = Void then
  618. open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_url, Void))
  619. else
  620. open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_face_url, a_entity_name))
  621. callbacks.open_entity_url(a_entity_name, a_face_url)
  622. end
  623. end
  624. ensure
  625. a_url.is_connected implies (
  626. open_buffers.count = old open_buffers.count + 1
  627. and then a_url = buffer.url
  628. )
  629. end
  630. connect_buffer_entity_value (entity_name, entity_value, entity_url: UNICODE_STRING)
  631. --| **** TODO: hunt memory leaks
  632. require
  633. entity_value /= Void
  634. local
  635. sis: STRING_INPUT_STREAM; a_url: URL
  636. do
  637. if valid_url(entity_url.as_utf8) then
  638. if url_pool.is_empty then
  639. create a_url.absolute(entity_url.as_utf8)
  640. else
  641. a_url := url_pool.item
  642. a_url.absolute(entity_url.as_utf8)
  643. end
  644. else
  645. if url_pool.is_empty then
  646. create a_url.relative(urls.top, entity_url.as_utf8)
  647. else
  648. a_url := url_pool.item
  649. a_url.relative(urls.top, entity_url.as_utf8)
  650. end
  651. end
  652. create sis.from_string(entity_value.as_utf8)
  653. connect_buffer(sis.url, a_url, entity_name)
  654. end
  655. disconnect_buffer
  656. require
  657. not open_buffers.is_empty
  658. do
  659. debug
  660. io.put_line(once "disconnect_buffer")
  661. end
  662. buffer.disconnect
  663. if open_buffers.top.entity /= Void then
  664. callbacks.close_entity_url(open_buffers.top.entity, open_buffers.top.url)
  665. end
  666. open_buffers.pop
  667. urls.pop
  668. ensure
  669. open_buffers.count = old open_buffers.count - 1
  670. not (old buffer).is_connected
  671. end
  672. url_pool: RECYCLING_POOL[URL]
  673. once
  674. create Result.make
  675. end
  676. invariant
  677. open_buffers /= Void
  678. urls /= Void
  679. open_buffers.count = urls.count
  680. end -- class XML_PARSER
  681. --
  682. -- Copyright (C) 2009-2017: by all the people cited in the AUTHORS file.
  683. --
  684. -- Permission is hereby granted, free of charge, to any person obtaining a copy
  685. -- of this software and associated documentation files (the "Software"), to deal
  686. -- in the Software without restriction, including without limitation the rights
  687. -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  688. -- copies of the Software, and to permit persons to whom the Software is
  689. -- furnished to do so, subject to the following conditions:
  690. --
  691. -- The above copyright notice and this permission notice shall be included in
  692. -- all copies or substantial portions of the Software.
  693. --
  694. -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  695. -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  696. -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  697. -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  698. -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  699. -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  700. -- THE SOFTWARE.