PageRenderTime 25ms CodeModel.GetById 11ms app.highlight 7ms RepoModel.GetById 2ms app.codeStats 0ms

/src/lib/xml/xml_parser.e

http://github.com/tybor/Liberty
Specman e | 731 lines | 113 code | 8 blank | 610 comment | 11 complexity | b51a3edbbc1f8aade5e640b58597fcd2 MD5 | raw file
  1-- See the Copyright notice at the end of this file.
  2--
  3class XML_PARSER
  4   --
  5   -- The standard Liberty Eiffel XML parser. It is able to parse any well-formed WML document, and also can
  6   -- validate a document containing a DTD.
  7   --
  8   -- Note that this parser is not namespace-aware, nor does it validate documents using a Schema. Classes
  9   -- to that avail must be built on top of this parser.
 10   --
 11   -- See http://www.w3.org/TR/2006/REC-xml11-20060816/
 12   --
 13   -- See also XML_CALLBACKS which is called by this parser when a parsing event occurs.
 14   --
 15insert
 16   XML_PARSER_TOOLS
 17      redefine
 18         next, end_of_input
 19      end
 20   URL_VALIDITY
 21
 22create {ANY}
 23   connect_to, make
 24
 25feature {ANY}
 26   parse (a_callbacks: like callbacks)
 27         -- Parse an XML documents by sending parsing events to the given `callbacks'
 28      require
 29         is_connected
 30      local
 31         pn: like parse_node
 32      do
 33         callbacks := a_callbacks
 34         pn := parse_node(True)
 35         if validator /= Void then
 36            validator.the_end
 37         end
 38      end
 39
 40   connect_to (a_url: URL)
 41      require
 42         not is_connected
 43         a_url.is_connected implies a_url.read
 44      do
 45         make
 46         if not a_url.is_connected and then a_url.can_connect then
 47            a_url.read_only
 48            a_url.connect
 49         end
 50         if a_url.is_connected then
 51            check
 52               a_url.read
 53            end
 54            connect_buffer(a_url, Void, Void)
 55         end
 56      ensure
 57         a_url.is_connected implies (a_url = buffer.url and then is_connected)
 58      end
 59
 60   disconnect
 61      require
 62         is_connected
 63      do
 64         from
 65         until
 66            not is_connected
 67         loop
 68            disconnect_buffer
 69         end
 70      ensure
 71         not is_connected
 72      end
 73
 74   is_connected: BOOLEAN
 75      do
 76         Result := open_buffers /= Void and then not open_buffers.is_empty and then buffer.is_connected
 77      end
 78
 79feature {}
 80   callbacks: XML_CALLBACKS
 81   url: URL
 82
 83   set_url (a_url: like url)
 84      do
 85         url := a_url
 86      end
 87
 88   parse_node (at_root: BOOLEAN): INTEGER
 89         -- True if a node was successfully parsed
 90      require
 91         is_connected
 92      local
 93         name, entity, entity_value, entity_url, data, data_blanks, pi_target, pi_data: UNICODE_STRING
 94         again, done, open, open_close: BOOLEAN; l, c: INTEGER
 95      do
 96         from
 97            again := True
 98         invariant
 99            is_connected
100         until
101            not again or else callbacks.at_error
102         loop
103            again := False
104            skip_blanks
105            l := line
106            c := column
107            from
108               data := once U""
109               data.clear_count
110               data_blanks := once U""
111               data_blanks.clear_count
112            until
113               end_of_input or else skip('<')
114            loop
115               if is_separator(current_character) then
116                  if data.is_empty then
117                     data_blanks.add_last(current_character)
118                  else
119                     data.add_last(current_character)
120                  end
121               else
122                  if not data_blanks.is_empty then
123                     data.append_string(data_blanks)
124                     data_blanks.clear_count
125                  end
126                  if current_character = '&'.code then
127                     l := line
128                     c := column
129                     next
130                     entity := read_identifier
131                     if current_character = ';'.code then
132                        if entity.is_equal(once U"lt") then
133                           data.add_last('<'.code)
134                           next
135                        elseif entity.is_equal(once U"gt") then
136                           data.add_last('>'.code)
137                           next
138                        elseif entity.is_equal(once U"amp") then
139                           data.add_last('&'.code)
140                           next
141                        elseif entity.is_equal(once U"apos") then
142                           data.add_last('%''.code)
143                           next
144                        elseif entity.is_equal(once U"quot") then
145                           data.add_last('"'.code)
146                           next
147                        else
148                           if validator /= Void then
149                              entity_value := validator.entity(entity, l, c)
150                              if entity_value = Void then
151                                 entity_value := callbacks.entity(entity, l, c)
152                                 entity_url := Void
153                              else
154                                 entity_url := validator.entity_url(entity, l, c)
155                              end
156                           else
157                              entity_value := callbacks.entity(entity, l, c)
158                              entity_url := Void
159                           end
160                           if entity_value = Void then
161                              callbacks.parse_error(l, c, once "Unknown entity")
162                              Result := Parse_error
163                           else
164                              next
165                              connect_buffer_entity_value(entity, entity_value, entity_url)
166                           end
167                        end
168                     else
169                        callbacks.parse_error(l, c, once "Missing ';'")
170                        Result := Parse_error
171                     end
172                  else
173                     data.add_last(current_character)
174                     next
175                  end
176               end
177            end
178            if not data.is_empty then
179               if validator /= Void and then not validator.is_valid_data(data, l, c) then
180                  callbacks.parse_error(l, c, once "Invalid data")
181                  Result := Parse_error
182               else
183                  if validator /= Void then
184                     validator.data(data, l, c)
185                  end
186                  callbacks.data(data, l, c)
187               end
188            end
189            if Result /= Parse_error then
190               if end_of_input then
191                  Result := Parse_done
192               else
193                  if skip('/') then
194                     skip_blanks
195                     l := line
196                     c := column
197                     name := read_name
198                     if name = Void then
199                        callbacks.parse_error(l, c, once "Closing tag name expected")
200                        Result := Parse_error
201                     else
202                        skip_blanks
203                        if not skip('>') then
204                           callbacks.parse_error(l, c, once "Missing '>'")
205                           Result := Parse_error
206                        else
207                           if callbacks.current_node.is_equal(name) then
208                              if validator = Void then
209                                 callbacks.close_node(name, l, c)
210                                 Result := Parse_done
211                              elseif not validator.is_valid_close_node(name, l, c) then
212                                 callbacks.parse_error(l, c, once "Invalid closing tag")
213                                 Result := Parse_error
214                              else
215                                 validator.close_node(name, l, c)
216                                 callbacks.close_node(name, l, c)
217                                 Result := Parse_done
218                              end
219                           else
220                              callbacks.parse_error(l, c, once "Unexpected closing tag")
221                              Result := Parse_error
222                           end
223                        end
224                     end
225                  elseif skip('!') then
226                     if skip2('-', '-') then
227                        from
228                        until
229                           skip2('-', '-') and then skip('>')
230                        loop
231                           next
232                        end
233                        again := True
234                     elseif skip_word(once "[CDATA[") then
235                        read_cdata
236                        again := True
237                     elseif at_root and then skip_word(once "DOCTYPE") then
238                        read_dtd
239                        again := True
240                     else
241                        callbacks.parse_error(l, c, once "Syntax error")
242                        Result := Parse_error
243                     end
244                  elseif skip('?') then
245                     skip_blanks
246                     if skip_word(once "xml") then
247                        from
248                        until
249                           done
250                        loop
251                           skip_blanks
252                           if skip('?') then
253                              if not skip('>') then
254                                 callbacks.parse_error(l, c, once "Missing '>'")
255                                 Result := Parse_error
256                              else
257                                 done := True
258                              end
259                           else
260                              Result := parse_attribute(buffer)
261                              done := Result = Parse_done
262                           end
263                        end
264                        if Result /= Parse_error then
265                           if at_root then
266                              callbacks.xml_header(l, c)
267                           else
268                              -- ignored!! (valid xml file included via an entity?)
269                           end
270                           again := True
271                        end
272                     else
273                        pi_target := read_identifier
274                        pi_data := once U""
275                        pi_data.clear_count
276                        from
277                        until
278                           done
279                        loop
280                           if skip('?') then
281                              if skip('>') then
282                                 done := True
283                              else
284                                 pi_data.extend('?'.code)
285                              end
286                           else
287                              pi_data.extend(current_character)
288                              next
289                           end
290                           if end_of_input then
291                              done := True
292                           end
293                        end
294                        if not end_of_input then
295                           callbacks.processing_instruction(pi_target, pi_data)
296                           Result := Parse_done
297                        else
298                           Result := Parse_error
299                        end
300                     end
301                  else
302                     skip_blanks
303                     l := line
304                     c := column
305                     name := read_name
306                     if name = Void then
307                        callbacks.parse_error(l, c, once "Opening tag name expected")
308                        Result := Parse_error
309                     else
310                        skip_blanks
311                        if skip('>') then
312                           if validator /= Void and then not validator.is_valid_open_node(name, l, c) then
313                              callbacks.parse_error(l, c, once "Invalid opening tag")
314                              Result := Parse_error
315                           else
316                              if validator /= Void then
317                                 validator.open_node(name, l, c)
318                              end
319                              callbacks.open_node(name, l, c)
320                           end
321                           Result := parse_children
322                        elseif skip2('/', '>') then
323                           if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then
324                              callbacks.parse_error(l, c, once "Invalid empty tag")
325                              Result := Parse_error
326                           else
327                              if validator /= Void then
328                                 validator.open_close_node(name, l, c)
329                              end
330                              callbacks.open_close_node(name, l, c)
331                           end
332                           Result := Parse_again
333                        else
334                           from
335                              Result := parse_attribute(Void)
336                              if skip('>') then
337                                 done := True
338                                 open := True
339                                 open_close := False
340                              elseif skip2('/', '>') then
341                                 done := True
342                                 open := False
343                                 open_close := True
344                              else
345                                 done := Result /= Parse_again
346                                 open := False
347                                 open_close := False
348                              end
349                           until
350                              done
351                           loop
352                              Result := parse_attribute(Void)
353                              if skip('>') then
354                                 done := True
355                                 open := True
356                                 open_close := False
357                              elseif skip2('/', '>') then
358                                 done := True
359                                 open := False
360                                 open_close := True
361                              else
362                                 done := Result /= Parse_again
363                                 open := False
364                                 open_close := False
365                              end
366                           end
367                           if Result /= Parse_error then
368                              if open then
369                                 if validator /= Void and then not validator.is_valid_open_node(name, l, c) then
370                                    callbacks.parse_error(l, c, once "Invalid opening tag")
371                                    Result := Parse_error
372                                 else
373                                    if validator /= Void then
374                                       validator.open_node(name, l, c)
375                                    end
376                                    callbacks.open_node(name, l, c)
377                                    Result := parse_children
378                                 end
379                              elseif open_close then
380                                 if validator /= Void and then not validator.is_valid_open_close_node(name, l, c) then
381                                    callbacks.parse_error(l, c, once "Invalid empty tag")
382                                    Result := Parse_error
383                                 else
384                                    if validator /= Void then
385                                       validator.open_close_node(name, l, c)
386                                    end
387                                    callbacks.open_close_node(name, l, c)
388                                 end
389                              end
390                           end
391                        end
392                     end
393                  end
394               end
395            end
396         end
397         if callbacks.at_error then
398            Result := Parse_error
399         end
400      ensure
401         (
402         <<Parse_again, Parse_done, Parse_error>>).has(Result)
403      end
404
405   read_name: UNICODE_STRING
406      local
407         name: UNICODE_STRING
408      do
409         name := read_identifier
410         if name /= Void then
411            Result := once U""
412            Result.copy(name)
413         end
414      end
415
416   parse_attribute (a_buffer: UNICODE_PARSER_BUFFER): INTEGER
417         -- if `a_buffer' if not Void and the attribute is "encoding", set the buffer's encoding.
418      local
419         a, an_attribute, value: UNICODE_STRING; l, c: INTEGER; sa, sv: STRING
420      do
421         skip_blanks
422         l := line
423         c := column
424         a := read_identifier
425         if a = Void then
426            Result := Parse_done
427         else
428            skip_blanks
429            if not skip('=') then
430               callbacks.parse_error(l, c, once "Missing '='")
431               Result := Parse_error
432            else
433               skip_blanks
434               an_attribute := once U""
435               an_attribute.copy(a)
436               debug
437                  sa := an_attribute.as_utf8
438               end
439               value := read_string
440               if value = Void then
441                  callbacks.parse_error(l, c, once "Value expected")
442                  Result := Parse_error
443               else
444                  if buffer /= Void then
445                     sa := once ""
446                     sa.clear_count
447                     an_attribute.utf8_encode_in(sa)
448                     inspect
449                        sa
450                     when "encoding" then
451                        sv := once ""
452                        value.utf8_encode_in(sv)
453                        buffer.set_encoding(sv)
454                     else
455                     end
456                  end
457                  if validator /= Void then
458                     validator.with_attribute(an_attribute, value, l, c)
459                  end
460                  callbacks.with_attribute(an_attribute, value, l, c)
461                  skip_blanks
462               end
463            end
464         end
465      end
466
467   parse_children: INTEGER
468      do
469         from
470            Result := parse_node(False)
471         until
472            Result /= Parse_again or else callbacks.at_error
473         loop
474            Result := parse_node(False)
475         end
476         if callbacks.at_error then
477            Result := Parse_error
478         elseif Result /= Parse_error then
479            Result := Parse_again
480         end
481      ensure
482         (<<Parse_again, Parse_done, Parse_error>>).has(Result)
483      end
484
485   read_cdata
486      local
487         l, c, s: INTEGER; b: UNICODE_STRING
488      do
489         l := line
490         c := column
491         from
492            b := once U""
493            b.clear_count
494         until
495            s < 0
496         loop
497            inspect
498               s
499            when 0 then
500               if current_character = ']'.code then
501                  s := 2
502               else
503                  b.extend('%N'.code)
504                  b.extend(current_character)
505                  s := 0
506               end
507            when 2 then
508               if current_character = ']'.code then
509                  s := 3
510               else
511                  b.extend(']'.code)
512                  b.extend(current_character)
513                  s := 0
514               end
515            when 3 then
516               if current_character = '>'.code then
517                  s := -1
518               else
519                  b.append(once U"]]")
520                  b.extend(current_character)
521                  s := 0
522               end
523            end
524            next
525         end
526         if validator /= Void and then not validator.is_valid_data(b, l, c) then
527            callbacks.parse_error(l, c, once "Invalid CDATA")
528         else
529            if validator /= Void then
530               validator.data(b, l, c)
531            end
532            callbacks.data(b, l, c)
533         end
534      end
535
536   read_dtd
537      require
538         is_connected
539      do
540         callbacks.set_validator(dtd_parser.parse(buffer))
541         skip_blanks
542         if dtd_parser.has_error then
543            callbacks.parse_error(line, column, dtd_parser.error_message)
544         end
545      end
546
547feature {}
548   Parse_again: INTEGER 0
549
550   Parse_done: INTEGER 1
551
552   Parse_error: INTEGER -1
553
554feature {}
555   next
556      do
557         if buffer.end_of_input then
558            disconnect_buffer
559         end
560         buffer.next
561      end
562
563   end_of_input: BOOLEAN
564      do
565         Result := buffer.end_of_input and then open_buffers.count = 1
566      end
567
568feature {}
569   buffer: UNICODE_PARSER_BUFFER
570      do
571         if not open_buffers.is_empty then
572            Result := open_buffers.top.buffer
573         end
574      ensure
575         definition: open_buffers.is_empty or else Result = open_buffers.top.buffer
576      end
577
578   make
579         -- Create a not connected parser
580      do
581         create open_buffers.make
582         create urls.make
583      end
584
585   dtd_parser: XML_DTD_PARSER
586      once
587         create Result.make
588      end
589
590   validator: XML_VALIDATOR
591      do
592         Result := callbacks.validator
593      end
594
595   open_buffers: STACK[XML_PARSER_BUFFER]
596   urls: STACK[URL]
597
598   closed_buffers: RECYCLING_POOL[UNICODE_PARSER_BUFFER]
599      once
600         create Result.make
601      end
602
603   connect_buffer (a_url, a_face_url: URL; a_entity_name: UNICODE_STRING)
604      require
605         a_face_url /= Void implies a_entity_name /= Void
606      local
607         buf: like buffer
608      do
609          debug
610              io.put_string(once "connect_buffer(%"")
611              io.put_string(a_url.out)
612              io.put_string(once "%", ")
613              if a_face_url = Void then
614                  io.put_string(once "Void")
615              else
616                  io.put_character('"')
617                  io.put_string(a_face_url.out)
618                  io.put_character('"')
619              end
620              io.put_string(once ", ")
621              if a_entity_name = Void then
622                  io.put_line(once "Void)")
623              else
624                  io.put_character('"')
625                  io.put_string(a_entity_name.as_utf8)
626                  io.put_line(once "%")")
627              end
628          end
629
630         if not closed_buffers.is_empty then
631            buf := closed_buffers.item
632         end
633         if buf = Void then
634            create buf.connect_to(a_url, Void)
635         else
636            buf.connect_to(a_url, Void)
637         end
638         if buf.is_connected then
639            check
640               a_url = buf.url
641            end
642            urls.push(a_url)
643            if a_face_url = Void then
644               open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_url, Void))
645            else
646               open_buffers.push(create {XML_PARSER_BUFFER}.set(buf, a_face_url, a_entity_name))
647               callbacks.open_entity_url(a_entity_name, a_face_url)
648            end
649         end
650      ensure
651         a_url.is_connected implies (
652            open_buffers.count = old open_buffers.count + 1
653            and then a_url = buffer.url
654         )
655      end
656
657   connect_buffer_entity_value (entity_name, entity_value, entity_url: UNICODE_STRING)
658         --| **** TODO: hunt memory leaks
659      require
660         entity_value /= Void
661      local
662         sis: STRING_INPUT_STREAM; a_url: URL
663      do
664         if valid_url(entity_url.as_utf8) then
665            if url_pool.is_empty then
666               create a_url.absolute(entity_url.as_utf8)
667            else
668               a_url := url_pool.item
669               a_url.absolute(entity_url.as_utf8)
670            end
671         else
672            if url_pool.is_empty then
673               create a_url.relative(urls.top, entity_url.as_utf8)
674            else
675               a_url := url_pool.item
676               a_url.relative(urls.top, entity_url.as_utf8)
677            end
678         end
679         create sis.from_string(entity_value.as_utf8)
680         connect_buffer(sis.url, a_url, entity_name)
681      end
682
683   disconnect_buffer
684      require
685         not open_buffers.is_empty
686      do
687          debug
688            io.put_line(once "disconnect_buffer")
689          end
690         buffer.disconnect
691         if open_buffers.top.entity /= Void then
692            callbacks.close_entity_url(open_buffers.top.entity, open_buffers.top.url)
693         end
694         open_buffers.pop
695         urls.pop
696      ensure
697         open_buffers.count = old open_buffers.count - 1
698         not (old buffer).is_connected
699      end
700
701   url_pool: RECYCLING_POOL[URL]
702      once
703         create Result.make
704      end
705
706invariant
707   open_buffers /= Void
708   urls /= Void
709   open_buffers.count = urls.count
710
711end -- class XML_PARSER
712--
713-- Copyright (C) 2009-2017: by all the people cited in the AUTHORS file.
714--
715-- Permission is hereby granted, free of charge, to any person obtaining a copy
716-- of this software and associated documentation files (the "Software"), to deal
717-- in the Software without restriction, including without limitation the rights
718-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
719-- copies of the Software, and to permit persons to whom the Software is
720-- furnished to do so, subject to the following conditions:
721--
722-- The above copyright notice and this permission notice shall be included in
723-- all copies or substantial portions of the Software.
724--
725-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
726-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
727-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
728-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
729-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
730-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
731-- THE SOFTWARE.