/src/lib/xml/xml_parser.e
Specman e | 731 lines | 113 code | 8 blank | 610 comment | 11 complexity | b51a3edbbc1f8aade5e640b58597fcd2 MD5 | raw file
1-- See the Copyright notice at the end of this file. 2-- 3class XML_PARSER 4 -- 5 -- The standard Liberty Eiffel XML parser. It is able to parse any well-formed WML document, and also can 6 -- validate a document containing a DTD. 7 -- 8 -- Note that this parser is not namespace-aware, nor does it validate documents using a Schema. Classes 9 -- to that avail must be built on top of this parser. 10 -- 11 -- See http://www.w3.org/TR/2006/REC-xml11-20060816/ 12 -- 13 -- See also XML_CALLBACKS which is called by this parser when a parsing event occurs. 14 -- 15insert 16 XML_PARSER_TOOLS 17 redefine 18 next, end_of_input 19 end 20 URL_VALIDITY 21 22create {ANY} 23 connect_to, make 24 25feature {ANY} 26 parse (a_callbacks: like callbacks) 27 -- Parse an XML documents by sending parsing events to the given `callbacks' 28 require 29 is_connected 30 local 31 pn: like parse_node 32 do 33 callbacks := a_callbacks 34 pn := parse_node(True) 35 if validator /= Void then 36 validator.the_end 37 end 38 end 39 40 connect_to (a_url: URL) 41 require 42 not is_connected 43 a_url.is_connected implies a_url.read 44 do 45 make 46 if not a_url.is_connected and then a_url.can_connect then 47 a_url.read_only 48 a_url.connect 49 end 50 if a_url.is_connected then 51 check 52 a_url.read 53 end 54 connect_buffer(a_url, Void, Void) 55 end 56 ensure 57 a_url.is_connected implies (a_url = buffer.url and then is_connected) 58 end 59 60 disconnect 61 require 62 is_connected 63 do 64 from 65 until 66 not is_connected 67 loop 68 disconnect_buffer 69 end 70 ensure 71 not is_connected 72 end 73 74 is_connected: BOOLEAN 75 do 76 Result := open_buffers /= Void and then not open_buffers.is_empty and then buffer.is_connected 77 end 78 79feature {} 80 callbacks: XML_CALLBACKS 81 url: URL 82 83 set_url (a_url: like url) 84 do 85 url := a_url 86 end 87 88 parse_node (at_root: BOOLEAN): INTEGER 89 -- True if a node was successfully parsed 90 require 91 is_connected 92 local 93 name, entity, entity_value, entity_url, data, data_blanks, pi_target, pi_data: UNICODE_STRING 94 again, done, open, open_close: BOOLEAN; l, c: INTEGER 95 do 96 from 97 again := True 98 invariant 99 is_connected 100 until 101 not again or else callbacks.at_error 102 loop 103 again := False 104 skip_blanks 105 l := line 106 c := column 107 from 108 data := once U"" 109 data.clear_count 110 data_blanks := once U"" 111 data_blanks.clear_count 112 until 113 end_of_input or else skip('<') 114 loop 115 if is_separator(current_character) then 116 if data.is_empty then 117 data_blanks.add_last(current_character) 118 else 119 data.add_last(current_character) 120 end 121 else 122 if not data_blanks.is_empty then 123 data.append_string(data_blanks) 124 data_blanks.clear_count 125 end 126 if current_character = '&'.code then 127 l := line 128 c := column 129 next 130 entity := read_identifier 131 if current_character = ';'.code then 132 if entity.is_equal(once U"lt") then 133 data.add_last('<'.code) 134 next 135 elseif entity.is_equal(once U"gt") then 136 data.add_last('>'.code) 137 next 138 elseif entity.is_equal(once U"amp") then 139 data.add_last('&'.code) 140 next 141 elseif entity.is_equal(once U"apos") then 142 data.add_last('%''.code) 143 next 144 elseif entity.is_equal(once U"quot") then 145 data.add_last('"'.code) 146 next 147 else 148 if validator /= Void then 149 entity_value := validator.entity(entity, l, c) 150 if entity_value = Void then 151 entity_value := callbacks.entity(entity, l, c) 152 entity_url := Void 153 else 154 entity_url := validator.entity_url(entity, l, c) 155 end 156 else 157 entity_value := callbacks.entity(entity, l, c) 158 entity_url := Void 159 end 160 if entity_value = Void then 161 callbacks.parse_error(l, c, once "Unknown entity") 162 Result := Parse_error 163 else 164 next 165 connect_buffer_entity_value(entity, entity_value, entity_url) 166 end 167 end 168 else 169 callbacks.parse_error(l, c, once "Missing ';'") 170 Result := Parse_error 171 end 172 else 173 data.add_last(current_character) 174 next 175 end 176 end 177 end 178 if not data.is_empty then 179 if validator /= Void and then not validator.is_valid_data(data, l, c) then 180 callbacks.parse_error(l, c, once "Invalid data") 181 Result := Parse_error 182 else 183 if validator /= Void then 184 validator.data(data, l, c) 185 end 186 callbacks.data(data, l, c) 187 end 188 end 189 if Result /= Parse_error then 190 if end_of_input then 191 Result := Parse_done 192 else 193 if skip('/') then 194 skip_blanks 195 l := line 196 c := column 197 name := read_name 198 if name = Void then 199 callbacks.parse_error(l, c, once "Closing tag name expected") 200 Result := Parse_error 201 else 202 skip_blanks 203 if not skip('>') then 204 callbacks.parse_error(l, c, once "Missing '>'") 205 Result := Parse_error 206 else 207 if callbacks.current_node.is_equal(name) then 208 if validator = Void then 209 callbacks.close_node(name, l, c) 210 Result := Parse_done 211 elseif not validator.is_valid_close_node(name, l, c) then 212 callbacks.parse_error(l, c, once "Invalid closing tag") 213 Result := Parse_error 214 else 215 validator.close_node(name, l, c) 216 callbacks.close_node(name, l, c) 217 Result := Parse_done 218 end 219 else 220 callbacks.parse_error(l, c, once "Unexpected closing tag") 221 Result := Parse_error 222 end 223 end 224 end 225 elseif skip('!') then 226 if skip2('-', '-') then 227 from 228 until 229 skip2('-', '-') and then skip('>') 230 loop 231 next 232 end 233 again := True 234 elseif skip_word(once "[CDATA[") then 235 read_cdata 236 again := True 237 elseif at_root and then skip_word(once "DOCTYPE") then 238 read_dtd 239 again := True 240 else 241 callbacks.parse_error(l, c, once "Syntax error") 242 Result := Parse_error 243 end 244 elseif skip('?') then 245 skip_blanks 246 if skip_word(once "xml") then 247 from 248 until 249 done 250 loop 251 skip_blanks 252 if skip('?') then 253 if not skip('>') then 254 callbacks.parse_error(l, c, once "Missing '>'") 255 Result := Parse_error 256 else 257 done := True 258 end 259 else 260 Result := parse_attribute(buffer) 261 done := Result = Parse_done 262 end 263 end 264 if Result /= Parse_error then 265 if at_root then 266 callbacks.xml_header(l, c) 267 else 268 -- ignored!! (valid xml file included via an entity?) 269 end 270 again := True 271 end 272 else 273 pi_target := read_identifier 274 pi_data := once U"" 275 pi_data.clear_count 276 from 277 until 278 done 279 loop 280 if skip('?') then 281 if skip('>') then 282 done := True 283 else 284 pi_data.extend('?'.code) 285 end 286 else 287 pi_data.extend(current_character) 288 next 289 end 290 if end_of_input then 291 done := True 292 end 293 end 294 if not end_of_input then 295 callbacks.processing_instruction(pi_target, pi_data) 296 Result := Parse_done 297 else 298 Result := Parse_error 299 end 300 end 301 else 302 skip_blanks 303 l := line 304 c := column 305 name := read_name 306 if name = Void then 307 callbacks.parse_error(l, c, once "Opening tag name expected") 308 Result := Parse_error 309 else 310 skip_blanks 311 if skip('>') then 312 if validator /= Void and then not validator.is_valid_open_node(name, l, c) then 313 callbacks.parse_error(l, c, once "Invalid opening tag") 314 Result := Parse_error 315 else 316 if validator /= Void then 317 validator.open_node(name, l, c) 318 end 319 callbacks.open_node(name, l, c) 320 end 321 Result := parse_children 322 elseif skip2('/', '>') then 323 if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then 324 callbacks.parse_error(l, c, once "Invalid empty tag") 325 Result := Parse_error 326 else 327 if validator /= Void then 328 validator.open_close_node(name, l, c) 329 end 330 callbacks.open_close_node(name, l, c) 331 end 332 Result := Parse_again 333 else 334 from 335 Result := parse_attribute(Void) 336 if skip('>') then 337 done := True 338 open := True 339 open_close := False 340 elseif skip2('/', '>') then 341 done := True 342 open := False 343 open_close := True 344 else 345 done := Result /= Parse_again 346 open := False 347 open_close := False 348 end 349 until 350 done 351 loop 352 Result := parse_attribute(Void) 353 if skip('>') then 354 done := True 355 open := True 356 open_close := False 357 elseif skip2('/', '>') then 358 done := True 359 open := False 360 open_close := True 361 else 362 done := Result /= Parse_again 363 open := False 364 open_close := False 365 end 366 end 367 if Result /= Parse_error then 368 if open then 369 if validator /= Void and then not validator.is_valid_open_node(name, l, c) then 370 callbacks.parse_error(l, c, once "Invalid opening tag") 371 Result := Parse_error 372 else 373 if validator /= Void then 374 validator.open_node(name, l, c) 375 end 376 callbacks.open_node(name, l, c) 377 Result := parse_children 378 end 379 elseif open_close then 380 if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then 381 callbacks.parse_error(l, c, once "Invalid empty tag") 382 Result := Parse_error 383 else 384 if validator /= Void then 385 validator.open_close_node(name, l, c) 386 end 387 callbacks.open_close_node(name, l, c) 388 end 389 end 390 end 391 end 392 end 393 end 394 end 395 end 396 end 397 if callbacks.at_error then 398 Result := Parse_error 399 end 400 ensure 401 ( 402 <<Parse_again, Parse_done, Parse_error>>).has(Result) 403 end 404 405 read_name: UNICODE_STRING 406 local 407 name: UNICODE_STRING 408 do 409 name := read_identifier 410 if name /= Void then 411 Result := once U"" 412 Result.copy(name) 413 end 414 end 415 416 parse_attribute (a_buffer: UNICODE_PARSER_BUFFER): INTEGER 417 -- if `a_buffer' if not Void and the attribute is "encoding", set the buffer's encoding. 418 local 419 a, an_attribute, value: UNICODE_STRING; l, c: INTEGER; sa, sv: STRING 420 do 421 skip_blanks 422 l := line 423 c := column 424 a := read_identifier 425 if a = Void then 426 Result := Parse_done 427 else 428 skip_blanks 429 if not skip('=') then 430 callbacks.parse_error(l, c, once "Missing '='") 431 Result := Parse_error 432 else 433 skip_blanks 434 an_attribute := once U"" 435 an_attribute.copy(a) 436 debug 437 sa := an_attribute.as_utf8 438 end 439 value := read_string 440 if value = Void then 441 callbacks.parse_error(l, c, once "Value expected") 442 Result := Parse_error 443 else 444 if buffer /= Void then 445 sa := once "" 446 sa.clear_count 447 an_attribute.utf8_encode_in(sa) 448 inspect 449 sa 450 when "encoding" then 451 sv := once "" 452 value.utf8_encode_in(sv) 453 buffer.set_encoding(sv) 454 else 455 end 456 end 457 if validator /= Void then 458 validator.with_attribute(an_attribute, value, l, c) 459 end 460 callbacks.with_attribute(an_attribute, value, l, c) 461 skip_blanks 462 end 463 end 464 end 465 end 466 467 parse_children: INTEGER 468 do 469 from 470 Result := parse_node(False) 471 until 472 Result /= Parse_again or else callbacks.at_error 473 loop 474 Result := parse_node(False) 475 end 476 if callbacks.at_error then 477 Result := Parse_error 478 elseif Result /= Parse_error then 479 Result := Parse_again 480 end 481 ensure 482 (<<Parse_again, Parse_done, Parse_error>>).has(Result) 483 end 484 485 read_cdata 486 local 487 l, c, s: INTEGER; b: UNICODE_STRING 488 do 489 l := line 490 c := column 491 from 492 b := once U"" 493 b.clear_count 494 until 495 s < 0 496 loop 497 inspect 498 s 499 when 0 then 500 if current_character = ']'.code then 501 s := 2 502 else 503 b.extend('%N'.code) 504 b.extend(current_character) 505 s := 0 506 end 507 when 2 then 508 if current_character = ']'.code then 509 s := 3 510 else 511 b.extend(']'.code) 512 b.extend(current_character) 513 s := 0 514 end 515 when 3 then 516 if current_character = '>'.code then 517 s := -1 518 else 519 b.append(once U"]]") 520 b.extend(current_character) 521 s := 0 522 end 523 end 524 next 525 end 526 if validator /= Void and then not validator.is_valid_data(b, l, c) then 527 callbacks.parse_error(l, c, once "Invalid CDATA") 528 else 529 if validator /= Void then 530 validator.data(b, l, c) 531 end 532 callbacks.data(b, l, c) 533 end 534 end 535 536 read_dtd 537 require 538 is_connected 539 do 540 callbacks.set_validator(dtd_parser.parse(buffer)) 541 skip_blanks 542 if dtd_parser.has_error then 543 callbacks.parse_error(line, column, dtd_parser.error_message) 544 end 545 end 546 547feature {} 548 Parse_again: INTEGER 0 549 550 Parse_done: INTEGER 1 551 552 Parse_error: INTEGER -1 553 554feature {} 555 next 556 do 557 if buffer.end_of_input then 558 disconnect_buffer 559 end 560 buffer.next 561 end 562 563 end_of_input: BOOLEAN 564 do 565 Result := buffer.end_of_input and then open_buffers.count = 1 566 end 567 568feature {} 569 buffer: UNICODE_PARSER_BUFFER 570 do 571 if not open_buffers.is_empty then 572 Result := open_buffers.top.buffer 573 end 574 ensure 575 definition: open_buffers.is_empty or else Result = open_buffers.top.buffer 576 end 577 578 make 579 -- Create a not connected parser 580 do 581 create open_buffers.make 582 create urls.make 583 end 584 585 dtd_parser: XML_DTD_PARSER 586 once 587 create Result.make 588 end 589 590 validator: XML_VALIDATOR 591 do 592 Result := callbacks.validator 593 end 594 595 open_buffers: STACK[XML_PARSER_BUFFER] 596 urls: STACK[URL] 597 598 closed_buffers: RECYCLING_POOL[UNICODE_PARSER_BUFFER] 599 once 600 create Result.make 601 end 602 603 connect_buffer (a_url, a_face_url: URL; a_entity_name: UNICODE_STRING) 604 require 605 a_face_url /= Void implies a_entity_name /= Void 606 local 607 buf: like buffer 608 do 609 debug 610 io.put_string(once "connect_buffer(%"") 611 io.put_string(a_url.out) 612 io.put_string(once "%", ") 613 if a_face_url = Void then 614 io.put_string(once "Void") 615 else 616 io.put_character('"') 617 io.put_string(a_face_url.out) 618 io.put_character('"') 619 end 620 io.put_string(once ", ") 621 if a_entity_name = Void then 622 io.put_line(once "Void)") 623 else 624 io.put_character('"') 625 io.put_string(a_entity_name.as_utf8) 626 io.put_line(once "%")") 627 end 628 end 629 630 if not closed_buffers.is_empty then 631 buf := closed_buffers.item 632 end 633 if buf = Void then 634 create buf.connect_to(a_url, Void) 635 else 636 buf.connect_to(a_url, Void) 637 end 638 if buf.is_connected then 639 check 640 a_url = buf.url 641 end 642 urls.push(a_url) 643 if a_face_url = Void then 644 open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_url, Void)) 645 else 646 open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_face_url, a_entity_name)) 647 callbacks.open_entity_url(a_entity_name, a_face_url) 648 end 649 end 650 ensure 651 a_url.is_connected implies ( 652 open_buffers.count = old open_buffers.count + 1 653 and then a_url = buffer.url 654 ) 655 end 656 657 connect_buffer_entity_value (entity_name, entity_value, entity_url: UNICODE_STRING) 658 --| **** TODO: hunt memory leaks 659 require 660 entity_value /= Void 661 local 662 sis: STRING_INPUT_STREAM; a_url: URL 663 do 664 if valid_url(entity_url.as_utf8) then 665 if url_pool.is_empty then 666 create a_url.absolute(entity_url.as_utf8) 667 else 668 a_url := url_pool.item 669 a_url.absolute(entity_url.as_utf8) 670 end 671 else 672 if url_pool.is_empty then 673 create a_url.relative(urls.top, entity_url.as_utf8) 674 else 675 a_url := url_pool.item 676 a_url.relative(urls.top, entity_url.as_utf8) 677 end 678 end 679 create sis.from_string(entity_value.as_utf8) 680 connect_buffer(sis.url, a_url, entity_name) 681 end 682 683 disconnect_buffer 684 require 685 not open_buffers.is_empty 686 do 687 debug 688 io.put_line(once "disconnect_buffer") 689 end 690 buffer.disconnect 691 if open_buffers.top.entity /= Void then 692 callbacks.close_entity_url(open_buffers.top.entity, open_buffers.top.url) 693 end 694 open_buffers.pop 695 urls.pop 696 ensure 697 open_buffers.count = old open_buffers.count - 1 698 not (old buffer).is_connected 699 end 700 701 url_pool: RECYCLING_POOL[URL] 702 once 703 create Result.make 704 end 705 706invariant 707 open_buffers /= Void 708 urls /= Void 709 open_buffers.count = urls.count 710 711end -- class XML_PARSER 712-- 713-- Copyright (C) 2009-2017: by all the people cited in the AUTHORS file. 714-- 715-- Permission is hereby granted, free of charge, to any person obtaining a copy 716-- of this software and associated documentation files (the "Software"), to deal 717-- in the Software without restriction, including without limitation the rights 718-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 719-- copies of the Software, and to permit persons to whom the Software is 720-- furnished to do so, subject to the following conditions: 721-- 722-- The above copyright notice and this permission notice shall be included in 723-- all copies or substantial portions of the Software. 724-- 725-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 726-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 727-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 728-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 729-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 730-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 731-- THE SOFTWARE.