PageRenderTime 3381ms CodeModel.GetById 16ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 3356ms

/src/tools/wrappers-generator/unicode/unicode_parser_buffer.e

http://github.com/tybor/Liberty
Specman e | 380 lines | 316 code | 24 blank | 40 comment | 29 complexity | 77bb4635e1482fd8cbc79af12db08f21 MD5 | raw file
  1-- See the Copyright notice at the end of this file.
  2--
  3class UNICODE_PARSER_BUFFER
  4	--
  5	-- A UTF-8 aware parser buffer for XML parsing
  6	--
  7
  8insert
  9	RECYCLABLE
 10	PLATFORM
 11
 12creation {ANY}
 13	connect_to
 14
 15feature {ANY}
 16	unknown_character: INTEGER is 0x0000fffd
 17
 18feature {ANY}
 19	connect_to (a_url: like url; a_encoding: STRING) is
 20		require
 21			not is_connected
 22		do
 23			if backlog = Void then
 24				create backlog.make(0)
 25			else
 26				check
 27					backlog.is_empty
 28				end
 29			end
 30			url := a_url
 31			if a_encoding /= Void then
 32				set_encoding(a_encoding)
 33			else
 34				size := 0
 35			end
 36			index := -1
 37			line := 1
 38			column := 1
 39			at_error := False
 40			next
 41		end
 42
 43	disconnect is
 44		do
 45			url := Void
 46			backlog.clear_count
 47		ensure
 48			not is_connected
 49		end
 50
 51	set_encoding (a_encoding: STRING) is
 52		require
 53			a_encoding /= Void
 54		local
 55			enc: STRING
 56		do
 57			enc := once ""
 58			enc.copy(a_encoding)
 59			enc.to_upper
 60			inspect
 61				enc
 62			when "UTF-8" then
 63				size := 4
 64			when "UTF-16" then
 65				size := 8
 66				not_yet_implemented
 67			else
 68				size := 1
 69			end
 70		end
 71
 72	url: URL
 73	index, line, column: INTEGER
 74	at_error: BOOLEAN
 75
 76	is_connected: BOOLEAN is
 77		do
 78			Result := url /= Void and then url.is_connected
 79		end
 80
 81	end_of_input: BOOLEAN is
 82		require
 83			is_connected
 84		do
 85			Result := index > backlog.upper and then url.input.end_of_input
 86		end
 87
 88	can_read_character: BOOLEAN is
 89		do
 90			Result := index < backlog.upper or else url.input.can_read_character
 91		end
 92
 93	next is
 94		require
 95			is_connected
 96			not end_of_input
 97			can_read_character
 98			not at_error
 99		local
100			input: INPUT_STREAM; n, w, x, y, z, b1, b2, b3: INTEGER
101		do
102			if index = backlog.upper then
103				input := url.input
104				inspect
105					size
106				when 0 then
107					-- no encoding set, it is an error to have a non-ASCII character
108					input.read_character
109					if not input.end_of_input then
110						if input.last_character.code > 0x7f then
111							at_error := True
112						else
113							backlog.add_last(input.last_character.code)
114						end
115					end
116				when 1 then
117					-- ASCII and 8-bit pages
118					input.read_character
119					if not input.end_of_input then
120						backlog.add_last(input.last_character.code)
121					end
122				when 4 then
123					-- UTF-8
124					input.read_character
125					if not input.end_of_input then
126						n := input.last_character.code
127						if n < 0x00000080 then
128							backlog.add_last(n)
129						else
130							breakpoint
131							input.read_character
132							if not input.end_of_input then
133								if n & 0x000000e0 = 0x000000c0 then
134									-- 2 bytes
135									y := n
136									input.read_character
137									if input.end_of_input then
138										backlog.add_last(unknown_character)
139									else
140										z := input.last_character.code
141										if z & 0x000000c0 = 0x00000080 then
142											y := y & 0x0000001f
143											z := z & 0x0000003f
144											b1 := y |>>> 2
145											b2 := ((y & 0x00000003) |<< 6) & z
146											backlog.add_last(b1 | b2)
147										else
148											backlog.add_last(unknown_character)
149										end
150									end
151								elseif n & 0x000000f0 = 0x000000e0 then
152									-- 3 bytes
153									x := n
154									input.read_character
155									if input.end_of_input then
156										backlog.add_last(unknown_character)
157									else
158										y := input.last_character.code
159										if y & 0x000000c0 = 0x00000080 then
160											input.read_character
161											if input.end_of_input then
162												backlog.add_last(unknown_character)
163											else
164												z := input.last_character.code
165												if z & 0x000000c0 = 0x00000080 then
166													x := x & 0x0000000f
167													y := y & 0x0000003f
168													z := z & 0x0000003f
169													b1 := (x |<< 4) & (y |>>> 2)
170													b2 := ((y & 0x00000003) |<< 6) & z
171													backlog.add_last(b1 | b2)
172												else
173													backlog.add_last(unknown_character)
174												end
175											end
176										else
177											backlog.add_last(unknown_character)
178										end
179									end
180								elseif n & 0x000000f8 = 0x000000f0 then
181									-- 4 bytes
182									w := n
183									input.read_character
184									if input.end_of_input then
185										backlog.add_last(unknown_character)
186									else
187										x := input.last_character.code
188										input.read_character
189										if input.end_of_input then
190											backlog.add_last(unknown_character)
191										else
192											y := input.last_character.code
193											if y & 0x000000c0 = 0x00000080 then
194												input.read_character
195												if input.end_of_input then
196													backlog.add_last(unknown_character)
197												else
198													z := input.last_character.code
199													if z & 0x000000c0 = 0x00000080 then
200														w := w & 0x00000007
201														x := x & 0x0000003f
202														y := y & 0x0000003f
203														z := z & 0x0000003f
204														b1 := (w |<< 2) | (x |>>> 4)
205														b2 := ((x & 0x0000000f) |<< 4) | (y |>> 2)
206														b3 := ((y & 0x00000003) |<< 6) & z
207														backlog.add_last(b1 | b2 | b3)
208													else
209														backlog.add_last(unknown_character)
210													end
211												end
212											else
213												backlog.add_last(unknown_character)
214											end
215										end
216									end
217								else
218									-- invalid code
219									backlog.add_last(unknown_character)
220								end
221							end
222						end
223					end
224				when 8 then
225					-- UTF-16
226					not_yet_implemented
227				end
228			end
229			if not at_error then
230				index := index + 1
231
232				if not end_of_input then
233					if code = '%N'.code then
234						line := line + 1
235						column := 1
236					else
237						column := column + 1
238					end
239				end
240			end
241		ensure
242			at_error or else (not url.input.end_of_input implies backlog.valid_index(index))
243			at_error or else (url.input.end_of_input implies index = backlog.upper + 1)
244		end
245
246	previous is
247		require
248			is_connected
249			index > 0
250		local
251			i: INTEGER
252		do
253			index := index - 1
254			if code = '%N'.code then
255				line := line - 1
256				from
257					column := 0
258					i := index
259				until
260					i = 0 or else backlog.item(i) = '%N'.code
261				loop
262					column := column + 1
263					i := i - 1
264				end
265			else
266				column := column - 1
267			end
268		ensure
269			not end_of_input
270			backlog.valid_index(index)
271		end
272
273	save_position: UNICODE_PARSER_POSITION is
274		do
275			Result.set(index, line, column)
276		end
277
278	restore_position (a_position: UNICODE_PARSER_POSITION) is
279		do
280			index := a_position.index
281			line := a_position.line
282			column := a_position.column
283		end
284
285	set_index (a_index: like index) is
286		require
287			valid_index(a_index)
288		do
289			index := a_index
290		end
291
292	valid_index (a_index: like index): BOOLEAN is
293		do
294			Result := backlog.valid_index(index)
295		end
296
297	character: CHARACTER is
298		require
299			is_connected
300			is_character
301		do
302			Result := code.to_character
303		ensure
304			Result.code = code
305		end
306
307	code: INTEGER is
308		require
309			is_connected
310		do
311			Result := backlog.item(index)
312		end
313
314	is_character: BOOLEAN is
315		require
316			is_connected
317		local
318			i: INTEGER
319		do
320			i := code
321			Result := i >= 0 and then i <= Maximum_character_code
322		end
323
324	append_substring_in (string: UNICODE_STRING; first, last: INTEGER) is
325		require
326			string /= Void
327			first <= last
328			valid_index(first)
329			valid_index(last)
330		local
331			i: INTEGER
332		do
333			from
334				i := first
335			until
336				i > last
337			loop
338				string.extend(backlog.item(i))
339				i := i + 1
340			end
341		end
342
343feature {}
344	backlog: FAST_ARRAY[INTEGER]
345	size: INTEGER
346
347feature {RECYCLING_POOL}
348	recycle is
349		do
350			url := Void
351		end
352
353end -- class UNICODE_PARSER_BUFFER
354--
355-- ------------------------------------------------------------------------------------------------------------
356-- Copyright notice below. Please read.
357--
358-- This file is part of the SmartEiffel standard library.
359-- Copyright(C) 1994-2002: INRIA - LORIA (INRIA Lorraine) - ESIAL U.H.P.       - University of Nancy 1 - FRANCE
360-- Copyright(C) 2003-2006: INRIA - LORIA (INRIA Lorraine) - I.U.T. Charlemagne - University of Nancy 2 - FRANCE
361--
362-- Authors: Dominique COLNET, Philippe RIBET, Cyril ADRIAN, Vincent CROIZIER, Frederic MERIZEN
363--
364-- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
365-- documentation files (the "Software"), to deal in the Software without restriction, including without
366-- limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
367-- the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
368-- conditions:
369--
370-- The above copyright notice and this permission notice shall be included in all copies or substantial
371-- portions of the Software.
372--
373-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
374-- LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
375-- EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
376-- AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
377-- OR OTHER DEALINGS IN THE SOFTWARE.
378--
379-- http://SmartEiffel.loria.fr - SmartEiffel@loria.fr
380-- ------------------------------------------------------------------------------------------------------------