/tags/matreshka-0.0.7/source/xml/xml-sax-simple_readers-scanner.adb.in

http://github.com/landgraf/matreshka · Autoconf · 993 lines · 835 code · 158 blank · 0 comment · 42 complexity · 028ee1eb6ea9dcbb6252cb7337c8aee3 MD5 · raw file

  1. ------------------------------------------------------------------------------
  2. -- --
  3. -- Matreshka Project --
  4. -- --
  5. -- XML Processor --
  6. -- --
  7. -- Runtime Library Component --
  8. -- --
  9. ------------------------------------------------------------------------------
  10. -- --
  11. -- Copyright Š 2010-2011, Vadim Godunko <vgodunko@gmail.com> --
  12. -- All rights reserved. --
  13. -- --
  14. -- Redistribution and use in source and binary forms, with or without --
  15. -- modification, are permitted provided that the following conditions --
  16. -- are met: --
  17. -- --
  18. -- * Redistributions of source code must retain the above copyright --
  19. -- notice, this list of conditions and the following disclaimer. --
  20. -- --
  21. -- * Redistributions in binary form must reproduce the above copyright --
  22. -- notice, this list of conditions and the following disclaimer in the --
  23. -- documentation and/or other materials provided with the distribution. --
  24. -- --
  25. -- * Neither the name of the Vadim Godunko, IE nor the names of its --
  26. -- contributors may be used to endorse or promote products derived from --
  27. -- this software without specific prior written permission. --
  28. -- --
  29. -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS --
  30. -- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT --
  31. -- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR --
  32. -- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT --
  33. -- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, --
  34. -- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED --
  35. -- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR --
  36. -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF --
  37. -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING --
  38. -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS --
  39. -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --
  40. -- --
  41. ------------------------------------------------------------------------------
  42. -- $Revision$ $Date$
  43. ------------------------------------------------------------------------------
  44. with Ada.Unchecked_Deallocation;
  45. with League.Strings.Internals;
  46. with Matreshka.Internals.Strings.Operations;
  47. with Matreshka.Internals.Unicode.Characters.Latin;
  48. with Matreshka.Internals.URI_Utilities;
  49. with XML.SAX.Simple_Readers.Callbacks;
  50. with XML.SAX.Simple_Readers.Scanner.Actions;
  51. with XML.SAX.Simple_Readers.Scanner.Tables;
  52. package body XML.SAX.Simple_Readers.Scanner is
  53. use type Interfaces.Unsigned_32;
  54. use Matreshka.Internals.Unicode;
  55. use Matreshka.Internals.Unicode.Characters.Latin;
  56. use Matreshka.Internals.Utf16;
  57. use Matreshka.Internals.XML;
  58. use Matreshka.Internals.XML.Entity_Tables;
  59. use Matreshka.Internals.XML.Symbol_Tables;
  60. use XML.SAX.Simple_Readers.Scanner.Tables;
  61. procedure Set_Whitespace_Matched
  62. (Self : not null access SAX_Simple_Reader'Class);
  63. -- Sets "whitespace matched" flag.
  64. procedure Free is
  65. new Ada.Unchecked_Deallocation
  66. (XML.SAX.Input_Sources.SAX_Input_Source'Class,
  67. XML.SAX.Input_Sources.SAX_Input_Source_Access);
  68. ---------------------------
  69. -- Enter_Start_Condition --
  70. ---------------------------
  71. procedure Enter_Start_Condition
  72. (Self : not null access SAX_Simple_Reader'Class;
  73. State : Interfaces.Unsigned_32) is
  74. begin
  75. Self.Scanner_State.YY_Start_State := State * 2 + 1;
  76. end Enter_Start_Condition;
  77. --------------
  78. -- Finalize --
  79. --------------
  80. procedure Finalize (Self : in out SAX_Simple_Reader'Class) is
  81. begin
  82. while not Self.Scanner_Stack.Is_Empty loop
  83. Free (Self.Scanner_State.Source);
  84. Self.Scanner_State := Self.Scanner_Stack.Last_Element;
  85. Self.Scanner_Stack.Delete_Last;
  86. end loop;
  87. -- Release shared string when scanner's stack is empty, because it is
  88. -- buffer for document entity.
  89. Matreshka.Internals.Strings.Dereference (Self.Scanner_State.Data);
  90. end Finalize;
  91. ----------------
  92. -- Initialize --
  93. ----------------
  94. procedure Initialize (Self : in out SAX_Simple_Reader'Class) is
  95. begin
  96. Self.Scanner_State.Start_Condition_Stack.Append (Tables.DOCUMENT_10);
  97. end Initialize;
  98. -------------------------
  99. -- Pop_Start_Condition --
  100. -------------------------
  101. procedure Pop_Start_Condition
  102. (Self : not null access SAX_Simple_Reader'Class) is
  103. begin
  104. Enter_Start_Condition
  105. (Self, Self.Scanner_State.Start_Condition_Stack.Last_Element);
  106. Self.Scanner_State.Start_Condition_Stack.Delete_Last;
  107. end Pop_Start_Condition;
  108. ------------------------------------
  109. -- Push_And_Enter_Start_Condition --
  110. ------------------------------------
  111. procedure Push_And_Enter_Start_Condition
  112. (Self : not null access SAX_Simple_Reader'Class;
  113. Push : Interfaces.Unsigned_32;
  114. Enter : Interfaces.Unsigned_32) is
  115. begin
  116. Self.Scanner_State.Start_Condition_Stack.Append (Push);
  117. Self.Scanner_State.YY_Start_State := 1 + 2 * Enter;
  118. end Push_And_Enter_Start_Condition;
  119. --------------------------------------------
  120. -- Push_Current_And_Enter_Start_Condition --
  121. --------------------------------------------
  122. procedure Push_Current_And_Enter_Start_Condition
  123. (Self : not null access SAX_Simple_Reader'Class;
  124. Enter : Interfaces.Unsigned_32) is
  125. begin
  126. Self.Scanner_State.Start_Condition_Stack.Append (Start_Condition (Self));
  127. Self.Scanner_State.YY_Start_State := 1 + 2 * Enter;
  128. end Push_Current_And_Enter_Start_Condition;
  129. -----------------
  130. -- Push_Entity --
  131. -----------------
  132. function Push_Entity
  133. (Self : not null access SAX_Simple_Reader'Class;
  134. Entity : Matreshka.Internals.XML.Entity_Identifier;
  135. In_Document_Type : Boolean;
  136. In_Literal : Boolean) return Boolean
  137. is
  138. Source : XML.SAX.Input_Sources.SAX_Input_Source_Access;
  139. Text : Matreshka.Internals.Strings.Shared_String_Access;
  140. Last_Match : Boolean;
  141. Condition : constant Interfaces.Unsigned_32 := Start_Condition (Self);
  142. begin
  143. -- Resolve entity when necessary.
  144. if not Is_Resolved (Self.Entities, Entity) then
  145. Callbacks.Call_Resolve_Entity
  146. (Self.all,
  147. Entity,
  148. League.Strings.Internals.Create
  149. (Public_Id (Self.Entities, Entity)),
  150. League.Strings.Empty_Universal_String,
  151. Matreshka.Internals.URI_Utilities.Construct_System_Id
  152. (League.Strings.Internals.Create
  153. (Base (Self.Entities, Entity)),
  154. League.Strings.Internals.Create
  155. (System_Id (Self.Entities, Entity))),
  156. Source);
  157. Text := Matreshka.Internals.Strings.Shared_Empty'Access;
  158. Last_Match := False;
  159. if not Self.Continue then
  160. Callbacks.Call_Fatal_Error
  161. (Self.all,
  162. League.Strings.To_Universal_String
  163. ("external entity is not resolved"));
  164. return False;
  165. end if;
  166. Set_Is_Resolved (Self.Entities, Entity, True);
  167. case Self.Version is
  168. when XML_1_0 =>
  169. Source.Set_Version (League.Strings.To_Universal_String ("1.0"));
  170. when XML_1_1 =>
  171. Source.Set_Version (League.Strings.To_Universal_String ("1.1"));
  172. end case;
  173. else
  174. Source := null;
  175. Text := Replacement_Text (Self.Entities, Entity);
  176. Last_Match := True;
  177. if Text.Unused = 0 then
  178. -- Replacement text is empty string,
  179. -- [XML 4.4.8] Included as PE
  180. --
  181. -- "Just as with external parsed entities, parameter entities need
  182. -- only be included if validating. When a parameter-entity
  183. -- reference is recognized in the DTD and included, its
  184. -- replacement text MUST be enlarged by the attachment of one
  185. -- leading and one following space (#x20) character; the intent is
  186. -- to constrain the replacement text of parameter entities to
  187. -- contain an integral number of grammatical tokens in the DTD.
  188. -- This behavior MUST NOT apply to parameter entity references
  189. -- within entity values; these are described in 4.4.5 Included in
  190. -- Literal."
  191. --
  192. -- Set Whitespace_Matched flag, it is used only while processing
  193. -- of DTD, so the place where parameter entity declarations are
  194. -- allowed.
  195. Self.Whitespace_Matched := True;
  196. return True;
  197. end if;
  198. end if;
  199. -- [XML 4.4.8] Included as PE
  200. --
  201. -- "Just as with external parsed entities, parameter entities need only
  202. -- be included if validating. When a parameter-entity reference is
  203. -- recognized in the DTD and included, its replacement text MUST be
  204. -- enlarged by the attachment of one leading and one following space
  205. -- (#x20) character; the intent is to constrain the replacement text of
  206. -- parameter entities to contain an integral number of grammatical
  207. -- tokens in the DTD. This behavior MUST NOT apply to parameter entity
  208. -- references within entity values; these are described in 4.4.5
  209. -- Included in Literal."
  210. --
  211. -- Set Whitespace_Matched flag, it is used only while processing of DTD,
  212. -- so the place where parameter entity declarations are allowed.
  213. Self.Whitespace_Matched := True;
  214. Self.Scanner_Stack.Append (Self.Scanner_State);
  215. Self.Scanner_State :=
  216. (Source => Source,
  217. Data => Text,
  218. Entity => Entity,
  219. In_Literal => In_Literal,
  220. Delimiter => 0,
  221. Base =>
  222. Matreshka.Internals.URI_Utilities.Construct_Base
  223. (Self.Scanner_State.Base,
  224. League.Strings.Internals.Create
  225. (System_Id (Self.Entities, Entity))),
  226. others => <>);
  227. if Last_Match then
  228. Self.Scanner_State.YY_Current_Position :=
  229. First_Position (Self.Entities, Entity);
  230. Self.Scanner_State.YY_Current_Index :=
  231. Integer (First_Position (Self.Entities, Entity)) + 1;
  232. if In_Document_Type then
  233. -- External subset processed after processing of internal subset
  234. -- is completed and scanner returns to DOCTYPE_INT start
  235. -- condition; but it must be switched back to
  236. -- DOCTYPE_INTSUBSET_10 or DOCTYPE_INTSUBSET_11 start condition.
  237. case Self.Version is
  238. when XML_1_0 =>
  239. Enter_Start_Condition (Self, DOCTYPE_INTSUBSET_10);
  240. when XML_1_1 =>
  241. Enter_Start_Condition (Self, DOCTYPE_INTSUBSET_11);
  242. end case;
  243. else
  244. if Condition = DOCUMENT_11
  245. and Is_Internal_General_Entity (Self.Entities, Entity)
  246. then
  247. -- Character references are resolved when replacement text of
  248. -- internal general entity is constructed. In XML 1.1 character
  249. -- references can refer to restricted characters which is not
  250. -- valid in text, but valid in replacement text.
  251. Enter_Start_Condition (Self, DOCUMENT_U11);
  252. else
  253. Enter_Start_Condition (Self, Condition);
  254. end if;
  255. end if;
  256. else
  257. -- Reset scanner to INITIAL state to be able to process text
  258. -- declaration at the beginning of the external entity.
  259. if In_Document_Type then
  260. -- External subset processed after processing of internal subset
  261. -- is completed and scanner returns to DOCTYPE_INT start
  262. -- condition; but it must be switched back to
  263. -- DOCTYPE_INTSUBSET_10 or DOCTYPE_INTSUBSET_11 start condition.
  264. case Self.Version is
  265. when XML_1_0 =>
  266. Push_And_Enter_Start_Condition
  267. (Self, DOCTYPE_INTSUBSET_10, INITIAL);
  268. when XML_1_1 =>
  269. Push_And_Enter_Start_Condition
  270. (Self, DOCTYPE_INTSUBSET_11, INITIAL);
  271. end case;
  272. else
  273. Push_And_Enter_Start_Condition (Self, Condition, INITIAL);
  274. end if;
  275. end if;
  276. return True;
  277. end Push_Entity;
  278. ------------------------------
  279. -- Reset_Whitespace_Matched --
  280. ------------------------------
  281. procedure Reset_Whitespace_Matched
  282. (Self : not null access SAX_Simple_Reader'Class) is
  283. begin
  284. Self.Whitespace_Matched := False;
  285. end Reset_Whitespace_Matched;
  286. ---------------------------------------
  287. -- Set_Document_Version_And_Encoding --
  288. ---------------------------------------
  289. procedure Set_Document_Version_And_Encoding
  290. (Self : not null access SAX_Simple_Reader'Class;
  291. Version : XML_Version;
  292. Encoding : League.Strings.Universal_String)
  293. is
  294. Restart : Boolean;
  295. Success : Boolean;
  296. End_Of_Source : Boolean;
  297. begin
  298. Pop_Start_Condition (Self);
  299. if Self.Version /= Version then
  300. -- [XML1.0 2.8]
  301. --
  302. -- "Note: When an XML 1.0 processor encounters a document that
  303. -- specifies a 1.x version number other than '1.0', it will process
  304. -- it as a 1.0 document. This means that an XML 1.0 processor will
  305. -- accept 1.x documents provided they do not use any non-1.0
  306. -- features."
  307. --
  308. -- [XML1.1 4.3.4]
  309. --
  310. -- "Each entity, including the document entity, can be separately
  311. -- declared as XML 1.0 or XML 1.1. The version declaration appearing
  312. -- in the document entity determines the version of the document as a
  313. -- whole. An XML 1.1 document may invoke XML 1.0 external entities,
  314. -- so that otherwise duplicated versions of external entities,
  315. -- particularly DTD external subsets, need not be maintained.
  316. -- However, in such a case the rules of XML 1.1 are applied to the
  317. -- entire document."
  318. --
  319. -- So, XML version of the document can be declared only once, in the
  320. -- XML declaration at the start of the document entity. All other
  321. -- occurrences of version declaration in external subset and
  322. -- external entities are ignored. This allows to simplify code of
  323. -- the version change subprogram.
  324. Self.Version := Version;
  325. case Self.Version is
  326. when XML_1_0 =>
  327. Enter_Start_Condition (Self, Tables.DOCUMENT_10);
  328. when XML_1_1 =>
  329. Enter_Start_Condition (Self, Tables.DOCUMENT_11);
  330. end case;
  331. end if;
  332. case Self.Version is
  333. when XML_1_0 =>
  334. Self.Scanner_State.Source.Reset
  335. (League.Strings.To_Universal_String ("1.0"),
  336. Encoding,
  337. Restart,
  338. Success);
  339. when XML_1_1 =>
  340. Self.Scanner_State.Source.Reset
  341. (League.Strings.To_Universal_String ("1.1"),
  342. Encoding,
  343. Restart,
  344. Success);
  345. end case;
  346. if not Success then
  347. Callbacks.Call_Fatal_Error
  348. (Self.all,
  349. League.Strings.To_Universal_String
  350. ("invalid or unsupported encoding"));
  351. elsif Restart then
  352. Matreshka.Internals.Strings.Operations.Reset
  353. (Self.Scanner_State.Data);
  354. Self.Scanner_State.Source.Next
  355. (Self.Scanner_State.Data, End_Of_Source);
  356. end if;
  357. end Set_Document_Version_And_Encoding;
  358. ----------------------------
  359. -- Set_Whitespace_Matched --
  360. ----------------------------
  361. procedure Set_Whitespace_Matched
  362. (Self : not null access SAX_Simple_Reader'Class) is
  363. begin
  364. Self.Whitespace_Matched := True;
  365. end Set_Whitespace_Matched;
  366. ---------------------
  367. -- Start_Condition --
  368. ---------------------
  369. function Start_Condition
  370. (Self : not null access SAX_Simple_Reader'Class)
  371. return Interfaces.Unsigned_32 is
  372. begin
  373. return (Self.Scanner_State.YY_Start_State - 1) / 2;
  374. end Start_Condition;
  375. ----------------------
  376. -- YY_Move_Backward --
  377. ----------------------
  378. procedure YY_Move_Backward
  379. (Self : not null access SAX_Simple_Reader'Class) is
  380. begin
  381. Self.Scanner_State.YY_Current_Position :=
  382. Self.Scanner_State.YY_Current_Position - 1;
  383. Self.Scanner_State.YY_Current_Index :=
  384. Self.Scanner_State.YY_Current_Index - 1;
  385. Self.Scanner_State.YY_Current_Column :=
  386. Self.Scanner_State.YY_Current_Column - 1;
  387. end YY_Move_Backward;
  388. -------------
  389. -- YY_Text --
  390. -------------
  391. function YY_Text
  392. (Self : not null access SAX_Simple_Reader'Class;
  393. Trim_Left : Natural := 0;
  394. Trim_Right : Natural := 0;
  395. Trim_Whitespace : Boolean := False)
  396. return Matreshka.Internals.Strings.Shared_String_Access
  397. is
  398. -- Trailing and leading character as well as whitespace characters
  399. -- belongs to BMP and don't require expensive UTF-16 decoding.
  400. FP : Utf16_String_Index
  401. := Self.Scanner_State.YY_Base_Position
  402. + Utf16_String_Index (Trim_Left);
  403. FI : Positive := Self.Scanner_State.YY_Base_Index + Trim_Left;
  404. LP : constant Utf16_String_Index
  405. := Self.Scanner_State.YY_Current_Position
  406. - Utf16_String_Index (Trim_Right);
  407. LI : constant Positive
  408. := Self.Scanner_State.YY_Current_Index - Trim_Right;
  409. C : Code_Point;
  410. begin
  411. if Trim_Whitespace then
  412. loop
  413. C := Code_Point (Self.Scanner_State.Data.Value (FP));
  414. exit when
  415. C /= Space
  416. and then C /= Character_Tabulation
  417. and then C /= Carriage_Return
  418. and then C /= Line_Feed;
  419. FP := FP + 1;
  420. FI := FI + 1;
  421. end loop;
  422. end if;
  423. return
  424. Matreshka.Internals.Strings.Operations.Slice
  425. (Self.Scanner_State.Data, FP, LP - FP, LI - FI);
  426. end YY_Text;
  427. -----------
  428. -- YYLex --
  429. -----------
  430. function YYLex
  431. (Self : not null access SAX_Simple_Reader'Class) return Token
  432. is
  433. use type XML.SAX.Input_Sources.SAX_Input_Source_Access;
  434. type YY_End_Of_Buffer_Actions is
  435. (YY_Continue_Scan, -- Continue scanning from the current position.
  436. -- It is used to continue processing after pop
  437. -- up of entity from the scanner's stack.
  438. YY_Report_Entity_End, -- Return end of entity mark.
  439. YY_Restart_Scan, -- Restart scanning from the base position.
  440. YY_Accept_Last_Match, -- Accept last matched action.
  441. YY_End_Of_Chunk, -- End of chunk of data is reached.
  442. YY_End_Of_Input); -- End of input is reached.
  443. YY_Action : Interfaces.Unsigned_32;
  444. YY_C : Interfaces.Unsigned_32;
  445. YY_Current_State : Interfaces.Unsigned_32;
  446. YY_Current_Code : Code_Point;
  447. YY_Last_Accepting_Position : Utf16_String_Index;
  448. YY_Last_Accepting_Index : Positive;
  449. YY_Last_Accepting_State : Interfaces.Unsigned_32;
  450. YY_Last_Accepting_Line : Natural;
  451. YY_Last_Accepting_Column : Natural;
  452. YY_Last_Accepting_Skip_LF : Boolean;
  453. YY_Last_Accepting : Boolean;
  454. YY_Next_Position : Utf16_String_Index;
  455. YY_Next_Index : Positive;
  456. YY_Next_Line : Natural;
  457. YY_Next_Column : Natural;
  458. YY_Next_Skip_LF : Boolean;
  459. YY_Last_Match_Position : Utf16_String_Index;
  460. YY_Last_Match_Index : Positive;
  461. YY_Last_Match_State : Interfaces.Unsigned_32;
  462. YY_Last_Match : Boolean;
  463. YYLVal : YYSType renames Self.YYLVal;
  464. YY_Last : Utf16_String_Index;
  465. End_Of_Source : Boolean;
  466. YY_End_Of_Buffer_Action : YY_End_Of_Buffer_Actions;
  467. YY_Position_Offset : Utf16_String_Index;
  468. YY_Index_Offset : Natural;
  469. YY_Start_Condition : Interfaces.Unsigned_32;
  470. function YY_Text_Internal
  471. (Trim_Left : Natural := 0;
  472. Trim_Right : Natural := 0;
  473. Trim_Whitespace : Boolean := False)
  474. return Matreshka.Internals.Strings.Shared_String_Access;
  475. ----------------------
  476. -- YY_Text_Internal --
  477. ----------------------
  478. function YY_Text_Internal
  479. (Trim_Left : Natural := 0;
  480. Trim_Right : Natural := 0;
  481. Trim_Whitespace : Boolean := False)
  482. return Matreshka.Internals.Strings.Shared_String_Access
  483. is
  484. -- Trailing and leading character as well as whitespace characters
  485. -- belongs to BMP and don't require expensive UTF-16 decoding.
  486. FP : Utf16_String_Index
  487. := Self.Scanner_State.YY_Base_Position
  488. + Utf16_String_Index (Trim_Left);
  489. FI : Positive := Self.Scanner_State.YY_Base_Index + Trim_Left;
  490. LP : constant Utf16_String_Index
  491. := Self.Scanner_State.YY_Current_Position
  492. - Utf16_String_Index (Trim_Right);
  493. LI : constant Positive
  494. := Self.Scanner_State.YY_Current_Index - Trim_Right;
  495. C : Code_Point;
  496. begin
  497. if Trim_Whitespace then
  498. loop
  499. C := Code_Point (Self.Scanner_State.Data.Value (FP));
  500. exit when
  501. C /= Space
  502. and then C /= Character_Tabulation
  503. and then C /= Carriage_Return
  504. and then C /= Line_Feed;
  505. FP := FP + 1;
  506. FI := FI + 1;
  507. end loop;
  508. end if;
  509. return
  510. Matreshka.Internals.Strings.Operations.Slice
  511. (Self.Scanner_State.Data, FP, LP - FP, LI - FI);
  512. end YY_Text_Internal;
  513. begin
  514. loop -- Loops until end-of-data is reached.
  515. Self.Scanner_State.YY_Base_Position :=
  516. Self.Scanner_State.YY_Current_Position;
  517. Self.Scanner_State.YY_Base_Index :=
  518. Self.Scanner_State.YY_Current_Index;
  519. Self.Scanner_State.YY_Base_Line :=
  520. Self.Scanner_State.YY_Current_Line;
  521. Self.Scanner_State.YY_Base_Column :=
  522. Self.Scanner_State.YY_Current_Column;
  523. Self.Scanner_State.YY_Base_Skip_LF :=
  524. Self.Scanner_State.YY_Current_Skip_LF;
  525. YY_Current_State := Self.Scanner_State.YY_Start_State;
  526. YY_Last_Match := False;
  527. YY_Last_Accepting := False;
  528. loop
  529. YY_Next_Position := Self.Scanner_State.YY_Current_Position;
  530. YY_Next_Index := Self.Scanner_State.YY_Current_Index;
  531. YY_Next_Line := Self.Scanner_State.YY_Current_Line;
  532. YY_Next_Column := Self.Scanner_State.YY_Current_Column;
  533. YY_Next_Skip_LF := Self.Scanner_State.YY_Current_Skip_LF;
  534. if YY_Next_Position < Self.Scanner_State.Data.Unused then
  535. Unchecked_Next
  536. (Self.Scanner_State.Data.Value,
  537. YY_Next_Position,
  538. YY_Current_Code);
  539. YY_Next_Index := YY_Next_Index + 1;
  540. -- Track line/column in entity
  541. if YY_Current_Code = Carriage_Return then
  542. -- Start of new line.
  543. YY_Next_Line := YY_Next_Line + 1;
  544. YY_Next_Column := 1;
  545. YY_Next_Skip_LF := True;
  546. elsif YY_Current_Code = Line_Feed then
  547. if YY_Next_Skip_LF then
  548. -- Ignore CR after LF.
  549. YY_Next_Skip_LF := False;
  550. else
  551. YY_Next_Line := YY_Next_Line + 1;
  552. YY_Next_Column := 1;
  553. end if;
  554. else
  555. -- Move to next column.
  556. YY_Next_Column := YY_Next_Column + 1;
  557. YY_Next_Skip_LF := False;
  558. end if;
  559. YY_C :=
  560. YY_EC_Base
  561. (YY_Current_Code / 16#100#) (YY_Current_Code mod 16#100#);
  562. else
  563. -- End of buffer reached.
  564. YY_C := 0;
  565. -- Aflex uses character with code point zero to mark end of
  566. -- buffer character. This character always has YY_EC zero.
  567. YY_Last_Match := YY_Last_Accepting;
  568. if YY_Last_Accepting then
  569. YY_Last_Match_Position := YY_Last_Accepting_Position;
  570. YY_Last_Match_Index := YY_Last_Accepting_Index;
  571. YY_Last_Match_State := YY_Last_Accepting_State;
  572. end if;
  573. end if;
  574. if YY_Accept (YY_Current_State) /= 0 then
  575. -- Accepting state reached, save for possible backtrack.
  576. YY_Last_Accepting_Position :=
  577. Self.Scanner_State.YY_Current_Position;
  578. YY_Last_Accepting_Index :=
  579. Self.Scanner_State.YY_Current_Index;
  580. YY_Last_Accepting_Line :=
  581. Self.Scanner_State.YY_Current_Line;
  582. YY_Last_Accepting_Column :=
  583. Self.Scanner_State.YY_Current_Column;
  584. YY_Last_Accepting_Skip_LF :=
  585. Self.Scanner_State.YY_Current_Skip_LF;
  586. YY_Last_Accepting_State := YY_Current_State;
  587. YY_Last_Accepting := True;
  588. end if;
  589. while YY_Chk (YY_Base (YY_Current_State) + YY_C)
  590. /= YY_Current_State
  591. loop
  592. YY_Current_State := YY_Def (YY_Current_State);
  593. if YY_Current_State >= YY_First_Template then
  594. YY_C := YY_Meta (YY_C);
  595. end if;
  596. end loop;
  597. Self.Scanner_State.YY_Current_Position := YY_Next_Position;
  598. Self.Scanner_State.YY_Current_Index := YY_Next_Index;
  599. Self.Scanner_State.YY_Current_Line := YY_Next_Line;
  600. Self.Scanner_State.YY_Current_Column := YY_Next_Column;
  601. Self.Scanner_State.YY_Current_Skip_LF := YY_Next_Skip_LF;
  602. YY_Current_State := YY_Nxt (YY_Base (YY_Current_State) + YY_C);
  603. exit when YY_Base (YY_Current_State) = YY_Jam_Base;
  604. end loop;
  605. -- Return back to last accepting state.
  606. <<Next_Action>>
  607. YY_Action := YY_Accept (YY_Current_State);
  608. case YY_Action is
  609. when 0 => -- must backtrack
  610. if YY_Last_Accepting then
  611. Self.Scanner_State.YY_Current_Position :=
  612. YY_Last_Accepting_Position;
  613. Self.Scanner_State.YY_Current_Index :=
  614. YY_Last_Accepting_Index;
  615. YY_Current_State :=
  616. YY_Last_Accepting_State;
  617. YY_Last_Accepting := False;
  618. goto Next_Action;
  619. else
  620. raise Program_Error;
  621. end if;
  622. pragma Style_Checks ("M127");
  623. %%
  624. pragma Style_Checks ("M79");
  625. -- when YY_END_OF_BUFFER + INITIAL + 1
  626. -- =>
  627. -- return End_Of_Input;
  628. --
  629. when YY_End_Of_Buffer =>
  630. if Self.Scanner_State.Source /= null then
  631. -- Input source is used to retrieve data.
  632. if Is_Document_Entity
  633. (Self.Entities, Self.Scanner_State.Entity)
  634. and Self.Scanner_State.YY_Base_Position /= 0
  635. then
  636. -- For document entity, remove already scanned data.
  637. -- Construct slice only when we actually need to move
  638. -- data.
  639. Matreshka.Internals.Strings.Operations.Slice
  640. (Self.Scanner_State.Data,
  641. Self.Scanner_State.YY_Base_Position,
  642. Self.Scanner_State.Data.Unused
  643. - Self.Scanner_State.YY_Base_Position,
  644. Self.Scanner_State.Data.Length
  645. - Self.Scanner_State.YY_Base_Index + 1);
  646. YY_Position_Offset := Self.Scanner_State.YY_Base_Position;
  647. YY_Index_Offset :=
  648. Self.Scanner_State.YY_Base_Index - 1;
  649. Self.Scanner_State.YY_Base_Position :=
  650. Self.Scanner_State.YY_Base_Position
  651. - YY_Position_Offset;
  652. Self.Scanner_State.YY_Base_Index :=
  653. Self.Scanner_State.YY_Base_Index - YY_Index_Offset;
  654. Self.Scanner_State.YY_Current_Position :=
  655. Self.Scanner_State.YY_Current_Position
  656. - YY_Position_Offset;
  657. Self.Scanner_State.YY_Current_Index :=
  658. Self.Scanner_State.YY_Current_Index - YY_Index_Offset;
  659. if YY_Last_Match then
  660. YY_Last_Match_Position :=
  661. YY_Last_Match_Position - YY_Position_Offset;
  662. YY_Last_Match_Index :=
  663. YY_Last_Match_Index - YY_Index_Offset;
  664. end if;
  665. end if;
  666. -- Obtain next portion of data from the input source.
  667. YY_Last := Self.Scanner_State.Data.Unused;
  668. Self.Scanner_State.Source.Next
  669. (Self.Scanner_State.Data, End_Of_Source);
  670. YY_End_Of_Buffer_Action := YY_Restart_Scan;
  671. if YY_Last = Self.Scanner_State.Data.Unused then
  672. -- There is no new data retrieved, handle end of source
  673. -- state. It is possible to not reach end of source and
  674. -- retrieve no new data at the same time, for example
  675. -- when source data is mailformed and decoder unable to
  676. -- convert data. The same situtation is possible when
  677. -- some kind of filter is inserted between input source
  678. -- and actual stream (SSL/TLS encription, for example).
  679. if End_Of_Source then
  680. -- Replacement text of the entity is loaded from input
  681. -- source and need to be stored in the entities table,
  682. -- except replacement text of the document entity.
  683. -- Input source can be deallocated.
  684. if not Is_Document_Entity
  685. (Self.Entities, Self.Scanner_State.Entity)
  686. then
  687. Set_Replacement_Text
  688. (Self.Entities,
  689. Self.Scanner_State.Entity,
  690. Self.Scanner_State.Data);
  691. Free (Self.Scanner_State.Source);
  692. -- XXX Input source should not be deallocated, it
  693. -- can be needed later to reread entity when XML
  694. -- version (document entity only) or encoding is
  695. -- changed.
  696. else
  697. Self.Scanner_State.Source := null;
  698. -- Input source of document entity is managed by
  699. -- application.
  700. end if;
  701. elsif Self.Scanner_State.Incremental then
  702. YY_End_Of_Buffer_Action := YY_End_Of_Chunk;
  703. end if;
  704. end if;
  705. else
  706. -- Input source is not used, complete replacement text of
  707. -- the entity is in the scanner's buffer. This covers two
  708. -- cases: (1) entity is internal or predefined entity, and
  709. -- (2) text of the entity is loaded completely.
  710. if Self.Scanner_State.Data.Unused
  711. /= Self.Scanner_State.YY_Base_Position
  712. then
  713. -- Continue processing till end of buffer will be
  714. -- reached.
  715. YY_End_Of_Buffer_Action := YY_Accept_Last_Match;
  716. else
  717. -- Replacement text of the entity is completely scanned,
  718. -- pop scanner's entity stack. When scanner's stack is
  719. -- empty returns End_Of_Input token.
  720. if not Self.Scanner_Stack.Is_Empty then
  721. if Is_Parameter_Entity
  722. (Self.Entities, Self.Scanner_State.Entity)
  723. then
  724. -- For parameter entities start condition need to
  725. -- be propagated to previous state, otherwise
  726. -- scanner can start from the wrong condition.
  727. -- For non-parameter entities it is not needed,
  728. -- because their processing doesn't use stack of
  729. -- start conditions.
  730. YY_Start_Condition := Start_Condition (Self);
  731. -- When entity's replacement text is empty and
  732. -- there are no text declaration, then scanner is
  733. -- in the initial state and actual state must be
  734. -- retrieved from the state stack.
  735. if YY_Start_Condition = INITIAL then
  736. Pop_Start_Condition (Self);
  737. YY_Start_Condition := Start_Condition (Self);
  738. end if;
  739. Free (Self.Scanner_State.Source);
  740. Self.Scanner_State :=
  741. Self.Scanner_Stack.Last_Element;
  742. Self.Scanner_Stack.Delete_Last;
  743. Enter_Start_Condition (Self, YY_Start_Condition);
  744. -- [XML 4.4.8] Included as PE
  745. --
  746. -- "Just as with external parsed entities,
  747. -- parameter entities need only be included if
  748. -- validating. When a parameter-entity reference is
  749. -- recognized in the DTD and included, its
  750. -- replacement text MUST be enlarged by the
  751. -- attachment of one leading and one following
  752. -- space (#x20) character; the intent is to
  753. -- constrain the replacement text of parameter
  754. -- entities to contain an integral number of
  755. -- grammatical tokens in the DTD. This behavior
  756. -- MUST NOT apply to parameter entity references
  757. -- within entity values; these are described in
  758. -- 4.4.5 Included in Literal."
  759. --
  760. -- Set Whitespace_Matched flag, it is used only
  761. -- while processing of DTD, so the place where
  762. -- parameter entity declarations are allowed.
  763. Self.Whitespace_Matched := True;
  764. YY_End_Of_Buffer_Action := YY_Continue_Scan;
  765. elsif Self.In_Document_Content
  766. and not Self.Scanner_State.In_Literal
  767. then
  768. -- For entity references in the document content
  769. -- we need to track start/end of entity.
  770. Free (Self.Scanner_State.Source);
  771. Self.Scanner_State :=
  772. Self.Scanner_Stack.Last_Element;
  773. Self.Scanner_Stack.Delete_Last;
  774. YY_End_Of_Buffer_Action := YY_Report_Entity_End;
  775. else
  776. Free (Self.Scanner_State.Source);
  777. Self.Scanner_State :=
  778. Self.Scanner_Stack.Last_Element;
  779. Self.Scanner_Stack.Delete_Last;
  780. YY_End_Of_Buffer_Action := YY_Continue_Scan;
  781. end if;
  782. else
  783. YY_End_Of_Buffer_Action := YY_End_Of_Input;
  784. end if;
  785. end if;
  786. end if;
  787. case YY_End_Of_Buffer_Action is
  788. when YY_Continue_Scan =>
  789. null;
  790. when YY_Report_Entity_End =>
  791. return Token_Entity_End;
  792. when YY_Restart_Scan | YY_End_Of_Chunk =>
  793. -- Back current position to base position.
  794. Self.Scanner_State.YY_Current_Position :=
  795. Self.Scanner_State.YY_Base_Position;
  796. Self.Scanner_State.YY_Current_Index :=
  797. Self.Scanner_State.YY_Base_Index;
  798. Self.Scanner_State.YY_Current_Line :=
  799. Self.Scanner_State.YY_Base_Line;
  800. Self.Scanner_State.YY_Current_Column :=
  801. Self.Scanner_State.YY_Base_Column;
  802. Self.Scanner_State.YY_Current_Skip_LF :=
  803. Self.Scanner_State.YY_Base_Skip_LF;
  804. if YY_End_Of_Buffer_Action = YY_End_Of_Chunk then
  805. return End_Of_Chunk;
  806. end if;
  807. when YY_Accept_Last_Match =>
  808. -- Replace current position to last matched position and
  809. -- process matched action.
  810. -- XXX: Other cases handle line/column numbers and
  811. -- "skip LF" flag also, should they be handled here?
  812. if YY_Last_Match then
  813. Self.Scanner_State.YY_Current_Position :=
  814. YY_Last_Match_Position;
  815. Self.Scanner_State.YY_Current_Index :=
  816. YY_Last_Match_Index;
  817. YY_Current_State :=
  818. YY_Last_Match_State;
  819. YY_Last_Match := False;
  820. else
  821. raise Program_Error;
  822. end if;
  823. goto Next_Action;
  824. when YY_End_Of_Input =>
  825. return End_Of_Input;
  826. end case;
  827. when others =>
  828. raise Program_Error
  829. with "Unhandled action"
  830. & Interfaces.Unsigned_32'Image (YY_Action)
  831. & " in scanner";
  832. end case;
  833. end loop; -- end of loop waiting for end of file
  834. end YYLex;
  835. end XML.SAX.Simple_Readers.Scanner;