PageRenderTime 40ms CodeModel.GetById 16ms app.highlight 12ms RepoModel.GetById 2ms app.codeStats 0ms

/src/wrappers/glib/partially-implemented/utf8_string.e

http://github.com/tybor/Liberty
Specman e | 2179 lines | 1329 code | 237 blank | 613 comment | 77 complexity | c247625d7eace6f35615ec29190bda03 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1indexing
   2	description: "Unicode UTF-8 string."
   3	copyright: "[
   4					Copyright (C) 2006 Paolo Redaelli, Glib team
   5					
   6					This library is free software; you can redistribute it and/or
   7					modify it under the terms of the GNU Lesser General Public License
   8					as published by the Free Software Foundation; either version 2.1 of
   9					the License, or (at your option) any later version.
  10					
  11					This library is distributed in the hopeOA that it will be useful, but
  12					WITHOUT ANY WARRANTY; without even the implied warranty of
  13					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14					Lesser General Public License for more details.
  15
  16					You should have received a copy of the GNU Lesser General Public
  17					License along with this library; if not, write to the Free Software
  18					Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19					02110-1301 USA
  20			]"
  21
  22class UTF8_STRING
  23	-- A string of Unicode characters encoded into UTF-8. 
  24	
  25	-- This particular encoding trades access efficiency for space efficiency:
  26	-- it uses a variable amount of memory to store each character, from 8 to
  27	-- 32 bits, thus making traversal and random-access costly; AFAIK random
  28	-- access is O(n) and trasversal while being still an O(1) operation is
  29	-- much more complex and costly than traversing a normal STRING.
  30
  31	-- UNICODE_MANIPULATION
  32
  33	-- A number of functions for dealing with Unicode characters and
  34	-- strings. There are analogues of the traditional C functions
  35	-- found in ctype.h character classification and case conversion
  36	-- functions, UTF-8 analogues of some string utility functions,
  37	-- functions to perform normalization, case conversion and
  38	-- collation on UTF-8 strings and finally functions to convert
  39	-- between the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
  40	
  41	-- The implementations of the Unicode functions in GLib are based
  42	-- on the Unicode Character Data tables, which are available from
  43	-- www.unicode.org. GLib 2.8 supports Unicode 4.0, GLib 2.10
  44	-- supports Unicode 4.1, GLib 2.12 supports Unicode 5.0.
  45
  46inherit
  47	HASHABLE
  48		redefine copy
  49		end
  50	COMPARABLE
  51		redefine is_equal, copy, compare, three_way_comparison
  52		end
  53	TRAVERSABLE[UNICODE_CHARACTER]
  54		redefine is_equal, copy
  55		end
  56	RECYCLABLE
  57		redefine is_equal, copy
  58		end
  59	WRAPPER
  60
  61insert 
  62	GUNICODE_EXTERNALS
  63	GUNICODE_MACROS
  64	GMEM_EXTERNALS
  65
  66creation {ANY}
  67	make, copy, make_empty, make_filled, from_utf8
  68
  69feature {ANY}
  70	capacity: NATURAL_32
  71	-- String capacity in bytes
  72	
  73	count: NATURAL_32
  74	-- String length in characters
  75
  76	lower: NATURAL_32 is do Result:=0.to_natural_32 end
  77	-- Minimum index; currently it is 0 to mimic C strings; note that
  78	-- correct coding style shall not rely on the actual value of this
  79	-- feature.
  80
  81	upper: NATURAL_32 is
  82		-- Maximum index
  83	do
  84		Result:=count-1.to_natural_32
  85	end
  86
  87feature {} -- 
  88	bytes_count: NATURAL_32 
  89	-- size in bytes. When you use non-ASCII characters it will be different than count
  90
  91feature {ANY} -- Creation / Modification:
  92	make (needed_capacity: NATURAL_32) is
  93			-- Initialize the string to have at least `needed_capacity' bytes
  94			-- of storage.
  95		do
  96			if needed_capacity > 0.to_natural_32 then
  97				if capacity < needed_capacity then
  98					wrapper := g_try_malloc(needed_capacity)
  99					if wrapper.is_null then raise(No_more_memory)
 100					else capacity := needed_capacity
 101					end
 102				end
 103			end
 104			count := 0
 105		ensure
 106			needed_capacity <= capacity
 107			empty_string: count = 0
 108		end
 109
 110	make_empty is
 111			-- Create an empty string.
 112		do
 113			make(0)
 114		end
 115
 116	make_filled (a_character: UNICODE_CHARACTER; n: NATURAL_32) is
 117			-- Initialize string with `n' copies of `a_character'.
 118		require
 119			a_character.is_valid
 120		do
 121			make(n)
 122			count := n
 123			fill_with(a_character)
 124		ensure
 125			count_set: count = n
 126			filled: occurrences(a_character) = count
 127		end
 128
 129	from_string (a_string: STRING) is
 130		-- Create an UTF8 string from `a_string'.
 131	require a_string/=Void
 132	local validated: BOOLEAN; an_end: POINTER
 133	do
 134		validated := g_utf8_validate (a_string.to_external, a_string.count, $an_end).to_boolean
 135		if validated then 
 136			handle := g_memdup(a_string.to_external, a_string.count)
 137			bytes_count := a_string.count
 138			capacity := bytes_count
 139			count := g_utf8_strlen(handle, bytes_count)
 140		else
 141			raise(Non_valid_utf8_string)
 142		end
 143	end
 144
 145	Non_valid_utf8_string: STRING is "Given string is not UTF8 valid."
 146
 147feature {WRAPPER_HANDLER}
 148	from_pointer (a_pointer: POINTER) is
 149		-- Create an UTF8 using the content found into `a_pointer' which is a zero-terminated string and it is not copied
 150	local validated: BOOLEAN; an_end: POINTER
 151	do
 152		validated := g_utf8_validate (a_pointer, -1, $an_end).to_boolean
 153		if validated then 
 154			handle := a_pointer
 155			bytes_count := an_end - a_pointer
 156			capacity := bytes_count
 157			count := g_utf8_strlen(handle, bytes_count)
 158		else
 159			raise(Non_valid_utf8_string)
 160		end
 161	end
 162
 163feature {ANY} -- Testing:
 164	is_empty: BOOLEAN is
 165			-- Has string length 0?
 166			--
 167			-- See also `count'.
 168		do
 169			Result := count = 0
 170		end
 171
 172	item (i: like lower): UNICODE_CHARACTER is
 173			-- Get unicode at position `i'.
 174			--
 175			-- See also `lower', `upper', `valid_index', `put'.
 176		require valid_index(i)
 177		local location: POINTER
 178		do
 179			Result.set(g_utf8_get_char(handle+g_utf8_offset_to_pointer(handle,i)))
 180		end
 181
 182	infix "@" (i: like lower):  is
 183			-- The infix notation which is actually just a synonym for `item'.
 184			--
 185			-- See also `item', `put'.
 186		require valid_index(i)
 187		do
 188			Result := item(i)
 189		ensure definition: Result = item(i)
 190		end
 191
 192	hash_code: INTEGER is
 193		local i: ITERATOR_ON_UTF8_STRING
 194		do
 195			from i:=get_new_iterator; i.start
 196			until i.is_off
 197			loop
 198				Result := {INTEGER 5} #* Result #+ i.item.code.hash_code
 199				i.next
 200			end
 201			if Result < 0 then
 202				Result := ~Result
 203			end
 204		end
 205
 206	infix "<" (other: like Current): BOOLEAN is
 207			-- Is `Current' less than `other'?
 208			--
 209			-- See also `>', `<=', `>=', `min', `max'.
 210		local
 211			i,j: ITERATOR_ON_UTF8_STRING
 212		do
 213			from
 214				i:=get_new_iterator; i.start
 215				j:=other.get_new_iterator; j.start
 216				maxi := count.min(other.count)
 217			until (i.is_off or j.is_off) or else i.item/=j.item
 218			loop i.next; j.next
 219			end
 220			if i.is_off or j.is_off then
 221				Result := count < other.count
 222			else Result := i.item < j.item
 223			end
 224		end
 225
 226	compare, three_way_comparison (other: like Current): INTEGER is
 227		-- Compares Current with `other' using the linguistically correct rules
 228		-- for the current locale. Result is < 0 if Current compares before
 229		-- `other', 0 if they compare equal, > 0 if Current compares after
 230		-- `other'. When sorting a large number of strings, it will be
 231		-- significantly faster to obtain collation keys with `collate_key'
 232		-- and compare the keys when sorting instead of sorting the original
 233		-- strings. Note: in C langauge comparison of collated key is done with
 234		-- function strcmp; AFAIK it should be the same comparing Eiffel
 235		-- STRINGs Paolo 2009-06-22
 236		do
 237			Result :=g_utf8_collate(handle,other.handle)
 238		
 239		end
 240
 241	is_equal (other: like Current): BOOLEAN is
 242			-- Do both strings have the same character sequence?
 243			--
 244			-- See also `same_as'.
 245		local i,j: ITERATOR_ON_UTF8_STRING
 246		do
 247			if Current = other then Result := True
 248			else
 249				if count = other.count then 
 250					-- TODO: Could be improved. If stored in canonical form
 251					-- direct memory comparison could be made. Paolo 2009-06-20
 252					from 
 253						Result := True
 254						i := Current.get_new_iterator; i.start
 255						j :=   other.get_new_iterator; j.start
 256					until i.is_off or else Result=False
 257					loop
 258						check not i.is_off implied not j.is_off end
 259						Result := i.item = j.item
 260						i.next; j.next
 261					end
 262				else Result:=False
 263				end
 264			end
 265		end
 266
 267	same_as (other: UNICODE_STRING): BOOLEAN is
 268			-- Case insensitive `is_equal'.
 269		require
 270			other /= Void
 271		local i,j: ITERATOR_ON_UTF8_STRING
 272		do
 273			if count=other.count then
 274				from 
 275					i := Current.get_new_iterator; i.start
 276					j :=   other.get_new_iterator; j.start
 277					Result := True
 278				until i.is_off or else Result = False
 279				loop
 280					Result := i.item.to_lower = j.item.to_lower
 281					i.next; j.next
 282				end
 283			else Result:=False
 284			end
 285		end
 286
 287	index_of (a_character: like item; a_start_index: like lower): REFERENCE[like lower] is
 288			-- Index of first occurrence of `a_character' at or after `a_start_index'
 289			-- Void if not found.
 290			--
 291			-- See also `reverse_index_of', `first_index_of', `last_index_of', `has'.
 292		require
 293			valid_start_index: start_index >= 1 and start_index <= count + 1
 294			a_character.is_valid
 295		local location: POINTER; integer_result: INTEGER_32
 296		do
 297			not_yet_implemented
 298			-- Not correct
 299			location := g_utf8_strchr(handle, -1, a_character.code)
 300			if location.is_not_null then
 301				integer_result := g_utf8_pointer_to_offset(handle,location)
 302				check integer_result > 0 end
 303				create Result.set_item(integer_result.to_natural_32)				
 304			end
 305		ensure
 306			Result /= Void implies item(Result.item) = a_character
 307		end
 308
 309	reverse_index_of (a_character: like item; a_start_index: like lower): REFERENCE[like lower] is
 310			-- Index of first occurrence of `a_character' at or before
 311			-- `a_start_index'; Void if none.
 312
 313			-- The search is done in reverse direction, which means from the `start_index' down
 314			-- to the first character.
 315			--
 316			-- See also `index_of', `last_index_of', `first_index_of'.
 317		require
 318			valid_start_index: start_index >= 0 and start_index <= count
 319			a_character.is_valid	
 320		local location: POINTER; integer_result: INTEGER_32
 321		do
 322			not_yet_implemented
 323			-- Not correct
 324			location := g_utf8_strrchr(handle, -1, a_character.code)
 325			if location.is_not_null then
 326				integer_result := g_utf8_pointer_to_offset(handle,location)
 327				check integer_result > 0 end
 328				create Result.set_item(integer_result.to_natural_32)				
 329			end
 330		ensure
 331			Result /= Void implies item(Result.item) = a_character
 332		end
 333
 334	first_index_of (a_character: like item): REFERENCE[like lower] is
 335			-- Index of first occurrence of `a_character'.
 336			--
 337			-- See also `last_index_of', `index_of', `reverse_index_of'.
 338		require
 339			a_character.is_valid
 340		local location: POINTER; integer_result: INTEGER_32
 341		do
 342			location := g_utf8_strchr(handle, -1, a_character.code)
 343			if location.is_not_null then
 344				integer_result := g_utf8_pointer_to_offset(handle,location)
 345				check integer_result > 0 end
 346				create Result.set_item(integer_result.to_natural_32)				
 347			end
 348		ensure
 349			definition: Resulti/=Void implies Result.is_equal(index_of(a_character,lower))
 350		end
 351
 352	last_index_of (unicode: like item): REFERENCE[like lower] is
 353			-- Index of last occurrence of `unicode', 0 if none.
 354			--
 355			-- See also `first_index_of', `reverse_index_of', `index_of'.
 356		do
 357			not_yet_implemented
 358		ensure
 359			definition: Result = reverse_index_of(unicode, upper)
 360		end
 361
 362	has (a_character: like first): BOOLEAN is
 363			-- True if `unicode' is in the STRING.
 364			--
 365			-- See also `index_of', `occurrences', `has_substring'.
 366		require
 367			valid_unicode_value: valid_unicode(unicode)
 368		do
 369			Result := index_of(unicode, 1) /= 0
 370		end
 371
 372	has_substring (other: UNICODE_STRING): BOOLEAN is
 373			-- True if `Current' contains `other'.
 374			--
 375			-- See also `substring_index', `has'.
 376		require
 377			other_not_void: other /= Void
 378		do
 379			Result := substring_index(other, 1) /= 0
 380		end
 381
 382	occurrences (unicode: INTEGER): INTEGER is
 383			-- Number of times character `unicode' appears in the string.
 384			--
 385			-- See also `remove_all_occurrences', `has'.
 386		require
 387			valid_unicode_value: valid_unicode(unicode)
 388		local
 389			i: INTEGER
 390		do
 391			from
 392				i := index_of(unicode, 1)
 393			until
 394				i = 0
 395			loop
 396				Result := Result + 1
 397				i := index_of(unicode, i)
 398			end
 399		ensure
 400			Result >= 0
 401		end
 402
 403	has_suffix (s: UNICODE_STRING): BOOLEAN is
 404			-- True if suffix of `Current' is `s'.
 405			--
 406			-- See also `remove_suffix', `has_prefix', `has_substring'.
 407		require
 408			s /= Void
 409		local
 410			i, offset: INTEGER
 411		do
 412			offset := count - s.count
 413			from
 414				Result := offset >= 0
 415				i := lower
 416			until
 417				not Result or else i > s.upper
 418			loop
 419				Result := item(i + offset) = s.item(i)
 420				i := i + 1
 421			end
 422		end
 423
 424	has_prefix (p: UNICODE_STRING): BOOLEAN is
 425			-- True if prefix of `Current' is `p'.
 426		require
 427			p /= Void
 428		local
 429			i: INTEGER
 430		do
 431			from
 432				Result := count >= p.count
 433				i := lower
 434			until
 435				not Result or else i > p.upper
 436			loop
 437				Result := item(i) = p.item(i)
 438				i := i + 1
 439			end
 440		end
 441
 442feature {ANY} -- Testing and Conversion:
 443	is_ascii: BOOLEAN is
 444			-- True if all unicode value is in range 0..127
 445		local
 446			i: INTEGER
 447		do
 448			from
 449				i := count - 1
 450			until
 451				i < 0 or else storage.item(i) & 0xFF80 /= 0
 452			loop
 453				i := i - 1
 454			end
 455			Result := i < 0
 456		end
 457
 458	to_utf8: STRING is
 459			-- New string is created, current unicode string is encoded
 460			-- with UTF-8 format.
 461			--
 462			-- See also: `utf8_encode_in' and `as_utf8' to save memory.
 463		do
 464			tmp_buffer.clear_count
 465			utf8_encode_in(tmp_buffer)
 466			Result := tmp_buffer.twin
 467		end
 468
 469	to_string: STRING is
 470		obsolete "Now use `to_utf8' instead (May 2008)."
 471		do
 472			Result := to_utf8
 473		end
 474
 475	as_utf8: STRING is
 476			-- Encode the string in UTF-8. Always returns the same once object.
 477			--
 478			-- See also: `to_utf8', `utf8_encode_in'.
 479		do
 480			Result := once ""
 481			Result.clear_count
 482			utf8_encode_in(Result)
 483		end
 484
 485	as_string: STRING is
 486		obsolete "Now use `as_utf8' instead (May 2008)."
 487		do
 488			Result := as_utf8
 489		end
 490
 491	utf8_encode_in (s: STRING) is
 492			-- Append the string in UTF-8 to `s'.
 493			--
 494			-- See also: `to_utf8', `as_utf8'.
 495		require
 496			s /= Void
 497		local
 498			i: INTEGER; v: INTEGER
 499		do
 500			from
 501				i := 1
 502			until
 503				i > count
 504			loop
 505				v := item(i)
 506				if v < 128 then
 507					s.extend(v.to_character)
 508				elseif v < 2048 then
 509					s.extend((v #// 64 + 192).to_character)
 510					s.extend((v #\\ 64 + 128).to_character)
 511				elseif v < 65536 then
 512					s.extend((v #// 4096 + 224).to_character)
 513					v := v #\\ 4096
 514					s.extend((v #// 64 + 128).to_character)
 515					s.extend((v #\\ 64 + 128).to_character)
 516				else
 517					check
 518						v < 0x00110000
 519					end
 520					s.extend((v #// 0x00040000 + 240).to_character)
 521					v := v #\\ 0x00040000
 522					s.extend((v #// 0x00001000 + 128).to_character)
 523					v := v #\\ 0x00001000
 524					s.extend((v #// 64 + 128).to_character)
 525					s.extend((v #\\ 64 + 128).to_character)
 526				end
 527				i := i + 1
 528			end
 529		end
 530
 531	utf16be_encode_in (s: STRING) is
 532			-- Append the string in UTF-16BE to `s'
 533		require
 534			s /= Void
 535		local
 536			i, k: INTEGER; v: INTEGER_16
 537		do
 538			from
 539			until
 540				i >= count
 541			loop
 542				v := storage.item(i)
 543				s.extend((v |>>> 8).to_character)
 544				s.extend((v & 0x00FF).to_character)
 545				if v & 0xF800 = 0xD800 then
 546					check
 547						low_surrogate_indexes.item(k) = i + 1
 548					end
 549					s.extend((low_surrogate_values.item(k) #// 256 + 220).to_character)
 550					s.extend((low_surrogate_values.item(k) & 0x00FF).to_character)
 551					k := k + 1
 552				end
 553				i := i + 1
 554			end
 555		end
 556
 557	utf8_decode_from (s: STRING): BOOLEAN is
 558			-- Use `s' as UTF-8 format encoded unicode string
 559			-- Return `False' if decoding process failed
 560		require
 561			s /= Void
 562		local
 563			i, k, seq_length: INTEGER; v: INTEGER
 564		do
 565			from
 566				Result := True
 567				i := 1
 568			until
 569				i > s.count
 570			loop
 571				v := s.item(i).code
 572				i := i + 1
 573				inspect
 574					v
 575				when 0 .. 127 then
 576					extend(v)
 577					k := 0
 578				when 192 .. 223 then
 579					v := v - 192
 580					k := 2
 581				when 224 .. 239 then
 582					v := v - 224
 583					k := 3
 584				when 240 .. 247 then
 585					v := v - 240
 586					k := 4
 587				else
 588					extend(65533)
 589					Result := False
 590					k := 0
 591				end
 592				from
 593					seq_length := k
 594				until
 595					k <= 1
 596				loop
 597					if i <= s.count and then s.item(i).code.in_range(128, 191) then
 598						v := v * 64 + s.item(i).code - 128
 599						i := i + 1
 600						k := k - 1
 601					else
 602						extend(65533)
 603						Result := False
 604						k := 0
 605					end
 606				end
 607				if k = 1 then
 608					if v < 128 or else v < 2048 and then seq_length > 2 or else v < 65536 and then seq_length > 3 then
 609						-- overlong sequence, must be refused by any UTF-8
 610						-- complient decode for security reasons.
 611						extend(65533)
 612						Result := False
 613					elseif not valid_unicode(v) then
 614						extend(65533)
 615						Result := False
 616					else
 617						extend(v)
 618					end
 619				end
 620			end
 621		end
 622
 623feature {}
 624	from_utf8 (s: STRING) is
 625			-- Use `s' as UTF-8 format encoded unicode string
 626			-- This function may be used for manifest strings
 627			-- See `utf8_decode_from' for error detection
 628		require
 629			s /= Void
 630		local
 631			error: BOOLEAN
 632		do
 633			make(s.count)
 634			error := utf8_decode_from(s)
 635		end
 636
 637feature {ANY} -- Modification:
 638	resize (new_count: INTEGER) is
 639			-- Resize Current. When `new_count' is greater than
 640			-- `count', new positions are initialized with unicode 0.
 641		require
 642			new_count >= 0
 643		local
 644			i: INTEGER
 645		do
 646			if new_count <= count then
 647			elseif capacity < new_count then
 648				if capacity = 0 then
 649					storage := storage.calloc(new_count)
 650				else
 651					storage := storage.realloc(capacity, new_count)
 652				end
 653				capacity := new_count
 654			else
 655				storage.clear(count, new_count - 1)
 656			end
 657			count := new_count
 658			from
 659				i := low_surrogate_indexes.upper
 660			until
 661				i < 0 or else low_surrogate_indexes.item(i) <= new_count
 662			loop
 663				--TODO: only one remove out of the loop
 664				low_surrogate_indexes.remove_last
 665				low_surrogate_values.remove_last
 666				i := i - 1
 667			end
 668		ensure
 669			count = new_count
 670			capacity >= old capacity
 671		end
 672
 673	clear_count, wipe_out is
 674			-- Discard all characters so that `is_empty' is True after that call.
 675			--	The internal `capacity' is not changed by this call (i.e. the internal `storage' memory is
 676			--	neither released nor shrunk).
 677			--
 678			-- See also `clear_count_and_capacity'.
 679		do
 680			count := 0
 681			low_surrogate_indexes.make(0)
 682			low_surrogate_values.make(0)
 683		ensure
 684			is_empty: count = 0
 685			capacity = old capacity
 686		end
 687
 688	clear_count_and_capacity is
 689			-- Discard all characters (`is_empty' is True after that call). The internal `capacity' may also be
 690			-- reduced after this call.
 691			--
 692			-- See also `clear_count'.
 693		do
 694			low_surrogate_indexes.clear_count_and_capacity
 695			low_surrogate_values.clear_count_and_capacity
 696			clear_count
 697			--*** capacity := 0
 698			--*** storage := null_storage
 699		ensure
 700			is_empty: count = 0
 701			capacity = 0
 702		end
 703
 704	copy (other: like Current) is
 705			-- Copy `other' onto Current.
 706			--
 707			-- See also `copy_substring'.
 708		do
 709			count := other.count
 710			if count > 0 then
 711				if capacity < count then
 712					storage := storage.calloc(count)
 713					capacity := count
 714				end
 715				storage.copy_from(other.storage, count - 1)
 716			end
 717			if low_surrogate_indexes = Void then
 718				create low_surrogate_indexes.make(0)
 719				create low_surrogate_values.make(0)
 720			end
 721			low_surrogate_indexes.copy(other.low_surrogate_indexes)
 722			low_surrogate_values.copy(other.low_surrogate_values)
 723		ensure then
 724			count = other.count
 725		end
 726
 727	copy_substring (s: like Current; start_index, end_index: INTEGER) is
 728			-- Copy the substring from `s' from `start_index' to `end_index'
 729			-- to Current.
 730			--
 731			-- See also `copy'.
 732			--|*** DUMB IMPLEMENTATION
 733		require
 734			string_not_void: s /= Void
 735			valid_start_index: 1 <= start_index
 736			valid_end_index: end_index <= s.count
 737			meaningful_interval: start_index <= end_index + 1
 738		do
 739			clear_count
 740			append_substring(s, start_index, end_index)
 741		end
 742
 743	fill_with (unicode: INTEGER) is
 744			-- Replace every unicode with the new value.
 745		require
 746			valid_unicode_value: valid_unicode(unicode)
 747		local
 748			i: INTEGER; code: INTEGER_16; remainder: INTEGER_16
 749		do
 750			if unicode >= 65536 then
 751				-- stored as high and low surrogate
 752				code := (unicode #// 1024 - 64).low_16
 753				remainder := (unicode & 0x000003FF).to_integer_16 --unicode #\\ 1024
 754				storage.set_all_with(code, count - 1)
 755				low_surrogate_values.resize(count)
 756				low_surrogate_values.set_all_with(remainder)
 757				from
 758					i := count - 1
 759					low_surrogate_indexes.resize(count)
 760				until
 761					i < 0
 762				loop
 763					low_surrogate_indexes.put(i + 1, i)
 764					i := i - 1
 765				end
 766			else
 767				code := unicode.low_16
 768				storage.set_all_with(code, count - 1)
 769				low_surrogate_values.resize(0)
 770				low_surrogate_indexes.resize(0)
 771			end
 772		ensure
 773			occurrences(unicode) = count
 774		end
 775
 776	replace_all (old_code, new_code: like item) is
 777			-- Replace all occurrences of the element `old_code' by `new_code'.
 778		require
 779			valid_unicode_value: valid_unicode(old_code)
 780			valid_unicode_value: valid_unicode(new_code)
 781		local
 782			i: INTEGER
 783		do
 784			--*** May be implemented in a more efficient way...
 785			if old_code /= new_code then
 786				from
 787					i := index_of(old_code, 1)
 788				until
 789					i = 0
 790				loop
 791					put(new_code, i)
 792					i := index_of(old_code, i + 1)
 793				end
 794			end
 795		ensure
 796			count = old count
 797			old_code /= new_code implies occurrences(old_code) = 0
 798		end
 799
 800	append, append_string (s: UNICODE_STRING) is
 801			-- Append a copy of 's' to `Current'.
 802			--
 803			-- See also `add_last', `add_first', `prepend', '+'.
 804		require
 805			s_not_void: s /= Void
 806		local
 807			s_count, needed_capacity, new_capacity, i: INTEGER; indexes: FAST_ARRAY[INTEGER]
 808		do
 809			s_count := s.count
 810			needed_capacity := count + s_count
 811			if needed_capacity > capacity then
 812				if capacity = 0 then
 813					storage := storage.calloc(needed_capacity)
 814					capacity := needed_capacity
 815				else
 816					new_capacity := (2 * capacity).max(needed_capacity)
 817					storage := storage.realloc(capacity, new_capacity)
 818					capacity := new_capacity
 819				end
 820			end
 821			storage.copy_at(count, s.storage, s_count)
 822			from
 823				indexes := s.low_surrogate_indexes
 824			until
 825				i > indexes.upper
 826			loop
 827				low_surrogate_indexes.add_last(indexes.item(i) + count)
 828				low_surrogate_values.add_last(s.low_surrogate_values.item(i))
 829				i := i + 1
 830			end
 831			count := needed_capacity
 832		end
 833
 834	append_substring (s: like Current; start_index, end_index: INTEGER) is
 835			-- Append the substring from `s' from `start_index' to `end_index'
 836			-- to Current.
 837			--|*** DUMB IMPLEMENTATION
 838		require
 839			string_not_void: s /= Void
 840			valid_start_index: 1 <= start_index
 841			valid_end_index: end_index <= s.count
 842			meaningful_interval: start_index <= end_index + 1
 843		local
 844			i: INTEGER
 845		do
 846			from
 847				i := start_index
 848			until
 849				i > end_index
 850			loop
 851				extend(s.item(i))
 852				i := i + 1
 853			end
 854		end
 855
 856	prepend (other: UNICODE_STRING) is
 857			-- Prepend `other' to `Current'.
 858			--
 859			-- See also `append'.
 860		require
 861			other /= Void
 862		local
 863			i, j, k: INTEGER
 864		do
 865			i := count
 866			j := other.count
 867			resize(i + j)
 868			if i > 0 and then j > 0 then
 869				storage.move(0, i - 1, j)
 870				from
 871					k := low_surrogate_indexes.upper
 872				until
 873					k < 0
 874				loop
 875					low_surrogate_indexes.put(low_surrogate_indexes.item(k) + j, k)
 876					k := k - 1
 877				end
 878			end
 879			-- May be implemented in a more efficient way...
 880			from
 881				k := other.low_surrogate_indexes.upper
 882			until
 883				k < 0
 884			loop
 885				low_surrogate_indexes.add_first(other.low_surrogate_indexes.item(k))
 886				low_surrogate_values.add_first(other.low_surrogate_values.item(k))
 887				k := k - 1
 888			end
 889			storage.copy_from(other.storage, j - 1)
 890		ensure
 891			(old other.twin + old Current.twin).is_equal(Current)
 892		end
 893
 894	insert_string (s: UNICODE_STRING; i: INTEGER) is
 895			-- Insert `s' at index `i', shifting characters from index `i'
 896			-- to `count' rightwards.
 897		require
 898			string_not_void: s /= Void
 899			valid_insertion_index: 1 <= i and i <= count + 1
 900		local
 901			j, k: INTEGER; pos, n: INTEGER
 902		do
 903			j := count
 904			k := s.count
 905			resize(j + k)
 906			if i <= j then
 907				storage.move(i - 1, j - 1, k)
 908			end
 909			storage.copy_at(i - 1, s.storage, k)
 910			pos := low_surrogate_position(i)
 911			j := low_surrogate_indexes.count + s.low_surrogate_indexes.count
 912			low_surrogate_indexes.resize(j)
 913			low_surrogate_values.resize(j)
 914			from
 915				-- move existing surrogates and adjust indexes
 916				n := s.low_surrogate_indexes.upper
 917			until
 918				n < 0
 919			loop
 920				j := j - 1
 921				low_surrogate_indexes.put(low_surrogate_indexes.item(pos + n) + k, j)
 922				low_surrogate_values.put(low_surrogate_values.item(pos + n), j)
 923				n := n - 1
 924			end
 925			from
 926				-- copy surrogates from s and adjust indexes
 927				n := s.low_surrogate_indexes.upper
 928				j := pos + n
 929			until
 930				n < 0
 931			loop
 932				low_surrogate_indexes.put(s.low_surrogate_indexes.item(n) + i, j)
 933				low_surrogate_values.put(s.low_surrogate_values.item(n), j)
 934				j := j - 1
 935				n := n - 1
 936			end
 937		end
 938
 939	replace_substring (s: UNICODE_STRING; start_index, end_index: INTEGER) is
 940			-- Replace the substring from `start_index' to `end_index',
 941			-- inclusive, with `s'.
 942		require
 943			string_not_void: s /= Void
 944			valid_start_index: 1 <= start_index
 945			valid_end_index: end_index <= count
 946			meaningful_interval: start_index <= end_index + 1
 947		do
 948			-- May be implemented in a more efficient way...
 949			remove_between(start_index, end_index)
 950			insert_string(s, start_index)
 951		end
 952
 953	infix "+" (other: UNICODE_STRING): like Current is
 954			-- Create a new UNICODE_STRING which is the concatenation of
 955			-- `Current' and `other'.
 956			--
 957			-- See also `append'.
 958		require
 959			other_exists: other /= Void
 960		do
 961			create Result.make(count + other.count)
 962			Result.append(Current)
 963			Result.append(other)
 964		ensure
 965			result_count: Result.count = count + other.count
 966		end
 967
 968	put (unicode: INTEGER; i: INTEGER) is
 969			-- Put `unicode' at position `i'.
 970			--
 971			-- See also `item', `lower', `upper', `swap'.
 972		require
 973			valid_index: valid_index(i)
 974			valid_unicode_value: valid_unicode(unicode)
 975		local
 976			v, n: INTEGER
 977		do
 978			if unicode >= 65536 then
 979				-- stored as high and low surrogate
 980				v := unicode #// 1024 - 64
 981				if storage.item(i - 1) & 0xF800 = 0xD800 then
 982					low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, low_surrogate_index(i))
 983				else
 984					n := low_surrogate_position(i)
 985					low_surrogate_indexes.add_last(0)
 986					low_surrogate_values.add_last(0)
 987					if n /= low_surrogate_indexes.upper then
 988						low_surrogate_indexes.move(n, low_surrogate_indexes.upper - 1, 1)
 989						low_surrogate_values.move(n, low_surrogate_values.upper - 1, 1)
 990					end
 991					low_surrogate_indexes.put(i, n)
 992					low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, n)
 993				end
 994				storage.put(v.low_16, i - 1)
 995			else
 996				if storage.item(i - 1) & 0xF800 = 0xD800 then
 997					v := low_surrogate_index(i)
 998					low_surrogate_indexes.remove(v)
 999					low_surrogate_values.remove(v)
1000				end
1001				storage.put(unicode.low_16, i - 1)
1002			end
1003		ensure
1004			item(i) = unicode
1005		end
1006
1007	swap (i1, i2: INTEGER) is
1008			-- Swap two characters.
1009			--
1010			-- See also `item', `put'.
1011		require
1012			valid_index(i1)
1013			valid_index(i2)
1014		local
1015			tmp: INTEGER_16; j1, j2: INTEGER; low_tmp: INTEGER_16; k1, k2: INTEGER
1016		do
1017			j1 := i1 - 1
1018			j2 := i2 - 1
1019			tmp := storage.item(j1)
1020			if tmp & 0xF800 = 0xD800 then
1021				if storage.item(j2) & 0xF800 = 0xD800 then
1022					k1 := low_surrogate_index(i1)
1023					k2 := low_surrogate_index(i2)
1024					low_tmp := low_surrogate_values.item(k1)
1025					low_surrogate_values.put(low_surrogate_values.item(k2), k1)
1026					low_surrogate_values.put(low_tmp, k2)
1027					low_surrogate_indexes.put(i2, k1)
1028					low_surrogate_indexes.put(i1, k2)
1029				else
1030					low_tmp := low_surrogate_values.item(k1)
1031					k1 := low_surrogate_index(i1)
1032					k2 := low_surrogate_position(i2)
1033					if k2 > k1 + 1 then
1034						low_surrogate_indexes.move(k1 + 1, k2 - 1, -1)
1035						low_surrogate_values.move(k1 + 1, k2 - 1, -1)
1036						k2 := k2 - 1
1037					elseif k1 > k2 then
1038						low_surrogate_indexes.move(k2, k1 - 1, 1)
1039						low_surrogate_values.move(k2, k1 - 1, 1)
1040						--else no move
1041					end
1042					low_surrogate_indexes.put(i1, k2)
1043					low_surrogate_values.put(low_tmp, k2)
1044				end
1045			else
1046				if storage.item(j2) & 0xF800 = 0xD800 then
1047					low_tmp := low_surrogate_values.item(k2)
1048					k1 := low_surrogate_position(i1)
1049					k2 := low_surrogate_index(i2)
1050					if k1 > k2 + 1 then
1051						low_surrogate_indexes.move(k2 + 1, k1 - 1, -1)
1052						low_surrogate_values.move(k2 + 1, k1 - 1, -1)
1053						k1 := k1 - 1
1054					elseif k2 > k1 then
1055						low_surrogate_indexes.move(k1, k2 - 1, 1)
1056						low_surrogate_values.move(k1, k2 - 1, 1)
1057						--else no move
1058					end
1059					low_surrogate_indexes.put(i2, k1)
1060					low_surrogate_values.put(low_tmp, k1)
1061					-- else i1 and i2 are not surrogate
1062				end
1063			end
1064			storage.put(storage.item(j2), j1)
1065			storage.put(tmp, j2)
1066		ensure
1067			item(i1) = old item(i2)
1068			item(i2) = old item(i1)
1069		end
1070
1071	insert_character (unicode: INTEGER; i: INTEGER) is
1072			-- Inserts `unicode' at index `i', shifting characters from
1073			-- position 'i' to `count' rightwards.
1074		require
1075			valid_insertion_index: 1 <= i and i <= count + 1
1076			valid_unicode_value: valid_unicode(unicode)
1077		local
1078			j, k: INTEGER
1079		do
1080			k := low_surrogate_position(i)
1081			from
1082				j := low_surrogate_indexes.upper
1083			until
1084				j < k
1085			loop
1086				low_surrogate_indexes.put(low_surrogate_indexes.item(j) + 1, j)
1087				j := j - 1
1088			end
1089			resize(count + 1)
1090			if count > 1 then
1091				storage.move(i - 1, count - 2, 1)
1092				storage.put(0, i - 1)
1093			end
1094			put(unicode, i)
1095		ensure
1096			item(i) = unicode
1097		end
1098
1099	shrink (min_index, max_index: INTEGER) is
1100			-- Keep only the slice [`min_index' .. `max_index'] or nothing
1101			-- when the slice is empty.
1102		require
1103			1 <= min_index
1104			max_index <= count
1105			min_index <= max_index + 1
1106		local
1107			i, j: INTEGER
1108		do
1109			if max_index < min_index then
1110				count := 0
1111				low_surrogate_indexes.make(0)
1112				low_surrogate_values.make(0)
1113			elseif min_index = 1 then
1114				count := max_index
1115				i := low_surrogate_position(count)
1116				if i <= low_surrogate_indexes.upper then
1117					if low_surrogate_indexes.item(i) = max_index then
1118						i := i + 1
1119					end
1120				end
1121				low_surrogate_indexes.resize(i)
1122				low_surrogate_values.resize(i)
1123			else
1124				storage.slice_copy(0, storage, min_index - 1, max_index - 1)
1125				from
1126					i := low_surrogate_position(min_index)
1127				until
1128					i > low_surrogate_indexes.upper or else low_surrogate_indexes.item(i) > max_index
1129				loop
1130					low_surrogate_indexes.put(low_surrogate_indexes.item(i) - min_index + 1, j)
1131					low_surrogate_values.put(low_surrogate_values.item(i), j)
1132					j := j + 1
1133					i := i + 1
1134				end
1135				low_surrogate_indexes.resize(j)
1136				low_surrogate_values.resize(j)
1137				count := max_index - min_index + 1
1138			end
1139		ensure
1140			count = max_index - min_index + 1
1141		end
1142
1143	remove (i: INTEGER) is
1144			-- Remove character at position `i'.
1145			--
1146			-- See also `remove_head', `remove_between', `remove_suffix', `remove_prefix'.
1147		require
1148			valid_removal_index: valid_index(i)
1149		do
1150			remove_between(i, i)
1151		ensure
1152			count = old count - 1
1153		end
1154
1155	add_first, precede (unicode: INTEGER) is
1156			-- Add `unicode' at first position.
1157			--
1158			-- See also `add_last'.
1159		require
1160			valid_unicode_value: valid_unicode(unicode)
1161		local
1162			i: INTEGER
1163		do
1164			from
1165				i := low_surrogate_indexes.upper
1166			until
1167				i < 0
1168			loop
1169				low_surrogate_indexes.put(low_surrogate_indexes.item(i) + 1, i)
1170				i := i - 1
1171			end
1172			resize(count + 1)
1173			if count > 1 then
1174				storage.move(0, count - 2, 1)
1175				storage.put(0, 0)
1176			end
1177			put(unicode, 1)
1178		ensure
1179			count = 1 + old count
1180			item(1) = unicode
1181		end
1182
1183	add_last, append_character, extend (unicode: INTEGER) is
1184			-- Append `unicode' to string.
1185			--
1186			-- See also `add_first'.
1187		require
1188			valid_unicode_value: valid_unicode(unicode)
1189		local
1190			new_capacity: INTEGER
1191		do
1192			if capacity > count then
1193			elseif capacity = 0 then
1194				new_capacity := 32
1195				storage := storage.calloc(new_capacity)
1196				capacity := new_capacity
1197			else
1198				new_capacity := 2 * capacity
1199				storage := storage.realloc(capacity, new_capacity)
1200				capacity := new_capacity
1201			end
1202			if unicode >= 65536 then
1203				-- stored as high and low surrogate
1204				low_surrogate_indexes.add_last(count)
1205				low_surrogate_values.add_last((unicode & 0x000003FF).to_integer_16)
1206				storage.put((unicode #// 1024 - 64).low_16, count)
1207			else
1208				storage.put(unicode.low_16, count)
1209			end
1210			count := count + 1
1211		ensure
1212			count = 1 + old count
1213			item(count) = unicode
1214		end
1215
1216	to_lower is
1217			-- Convert all characters to lower case.
1218			--
1219			-- See also `to_upper', `as_lower', `as_upper'.
1220		do
1221			not_yet_implemented
1222		end
1223
1224	to_upper is
1225			-- Convert all characters to upper case.
1226			--
1227			-- See also `to_lower', `as_upper', `as_lower'.
1228		do
1229			not_yet_implemented
1230		end
1231
1232	as_lower: like Current is
1233			-- New object with all letters in lower case.
1234			--
1235			-- See also `as_upper', `to_lower', `to_upper'.
1236		do
1237			create Result.copy(Current)
1238			Result.to_lower
1239		end
1240
1241	as_upper: like Current is
1242			-- New object with all letters in upper case.
1243			--
1244			-- See also `as_lower', `to_upper', `to_lower'.
1245		do
1246			create Result.copy(Current)
1247			Result.to_upper
1248		end
1249
1250	keep_head (n: INTEGER) is
1251			-- Remove all characters except for the first `n'.
1252			-- Do nothing if `n' >= `count'.
1253			--
1254			-- See also `keep_tail', `remove_head', `remove_tail'.
1255		require
1256			n_non_negative: n >= 0
1257		do
1258			if n < count then
1259				remove_tail(count - n)
1260			end
1261		ensure
1262			count = n.min(old count)
1263		end
1264
1265	keep_tail (n: INTEGER) is
1266			-- Remove all characters except for the last `n'.
1267			-- Do nothing if `n' >= `count'.
1268			--
1269			-- See also `keep_head', `remove_tail', `remove_head'.
1270		require
1271			n_non_negative: n >= 0
1272		do
1273			if n < count then
1274				remove_head(count - n)
1275			end
1276		ensure
1277			count = n.min(old count)
1278		end
1279
1280	remove_first is
1281			-- Remove the `first' item.
1282			--
1283			-- See also `remove_head', `remove_last', `remove'.
1284		require
1285			not is_empty
1286		do
1287			--*** May be improved?
1288			remove_between(1, 1)
1289		ensure
1290			count = old count - 1
1291		end
1292
1293	remove_head (n: INTEGER) is
1294			-- Remove `n' first characters. If `n' >= `count', remove all.
1295			--
1296			-- See also `remove_tail', `remove', `remove_the_first'.
1297		require
1298			n_non_negative: n >= 0
1299		do
1300			if n > count then
1301				count := 0
1302				low_surrogate_indexes.make(0)
1303				low_surrogate_values.make(0)
1304			else
1305				if n > 0 then
1306					remove_between(1, n)
1307				end
1308			end
1309		ensure
1310			count = (old count - n).max(0)
1311		end
1312
1313	remove_last is
1314			-- Remove the `last' item.
1315			--
1316			-- See also `remove_tail', `remove_first', `remove'.
1317		require
1318			not is_empty
1319		do
1320			--*** May be improved
1321			remove_tail(1)
1322		ensure
1323			count = old count - 1
1324		end
1325
1326	remove_tail (n: INTEGER) is
1327			-- Remove `n' last characters. If `n' >= `count', remove all.
1328			--
1329			-- See also `remove_head', `remove', `remove_the_last'.
1330		require
1331			n_non_negative: n >= 0
1332		local
1333			i: INTEGER
1334		do
1335			if n > count then
1336				count := 0
1337				low_surrogate_indexes.make(0)
1338				low_surrogate_values.make(0)
1339			else
1340				count := count - n
1341				i := low_surrogate_position(count + 1)
1342				low_surrogate_indexes.resize(i)
1343				low_surrogate_values.resize(i)
1344			end
1345		ensure
1346			count = (old count - n).max(0)
1347		end
1348
1349	remove_substring, remove_between (start_index, end_index: INTEGER) is
1350			-- Remove all characters from `strt_index' to `end_index' inclusive.
1351		require
1352			valid_start_index: 1 <= start_index
1353			valid_end_index: end_index <= count
1354			meaningful_interval: start_index <= end_index + 1
1355		local
1356			i, k, len: INTEGER
1357		do
1358			len := end_index - start_index + 1
1359			if len > 0 then
1360				from
1361					i := low_surrogate_position(start_index)
1362					k := low_surrogate_position(end_index + 1)
1363				until
1364					k > low_surrogate_indexes.upper
1365				loop
1366					low_surrogate_indexes.put(low_surrogate_indexes.item(k) - len, i)
1367					low_surrogate_values.put(low_surrogate_values.item(k), i)
1368					k := k + 1
1369					i := i + 1
1370				end
1371				low_surrogate_indexes.resize(i)
1372				low_surrogate_values.resize(i)
1373				storage.slice_copy(start_index - 1, storage, end_index, count - 1)
1374				count := count - len
1375			end
1376		ensure
1377			count = old count - (end_index - start_index + 1)
1378		end
1379
1380	remove_suffix (s: UNICODE_STRING) is
1381			-- Remove the suffix `s' of current string.
1382			--
1383			-- See also `remove_prefix', `remove_tail', `remove'.
1384		require
1385			has_suffix(s)
1386		do
1387			not_yet_implemented
1388			-- remove_last(s.count); equal sequence may have different size
1389		ensure
1390			(old Current.twin).is_equal(Current + old s.twin)
1391		end
1392
1393	remove_prefix (s: UNICODE_STRING) is
1394			-- Remove the prefix `s' of current string.
1395			--
1396			-- See also `remove_suffix', `remove_head', `remove'.
1397		require
1398			has_prefix(s)
1399		do
1400			not_yet_implemented
1401			-- remove_head(s.count); equal sequence may have different size
1402		ensure
1403			(old Current.twin).is_equal(old s.twin + Current)
1404		end
1405
1406	left_adjust is
1407			-- Remove leading blanks.
1408			--
1409			-- See also `remove_head', `first'.
1410		local
1411			i: INTEGER
1412		do
1413			from
1414				i := 1
1415			until
1416				i > count or else not is_space(item(i -- not_yet_implemented -- handle combining characters
1417				))
1418			loop
1419				i := i + 1
1420			end
1421			remove_head(i - 1)
1422		ensure
1423		-- not_yet_implemented -- handle combining characters
1424			stripped: is_empty or else not is_space(first)
1425		end
1426
1427	right_adjust is
1428			-- Remove trailing blanks.
1429			--
1430			-- See also `remove_tail', `last'.
1431		local
1432			i: INTEGER
1433		do
1434			from
1435			until
1436				count = 0 or else not is_space(item(count -- not_yet_implemented -- handle combining characters
1437				))
1438			loop
1439				count := count - 1
1440			end
1441			i := low_surrogate_position(count + 1)
1442			low_surrogate_indexes.resize(i)
1443			low_surrogate_values.resize(i)
1444		ensure
1445		-- not_yet_implemented -- handle combining characters
1446			stripped: is_empty or else not is_space(last)
1447		end
1448
1449feature {ANY} -- Printing:
1450	out_in_tagged_out_memory is
1451		do
1452			utf8_encode_in(tagged_out_memory)
1453		end
1454
1455	fill_tagged_out_memory is
1456		do
1457			tagged_out_memory.append(once "count: ")
1458			count.append_in(tagged_out_memory)
1459			tagged_out_memory.append(once "capacity: ")
1460			capacity.append_in(tagged_out_memory)
1461			tagged_out_memory.append(once "storage: %"")
1462			utf8_encode_in(tagged_out_memory)
1463			tagged_out_memory.append_character('%"')
1464		end
1465
1466feature {ANY} -- Other features:
1467	first: UNICODE_CHARACTER is
1468			-- The first character.
1469			--
1470			-- See also `last', `item'.
1471		do
1472			Result.set(g_utf8_get_char(handle))
1473		end
1474
1475	last: INTEGER is
1476			-- The last character.
1477			--
1478			-- See also `first', `item'.
1479		do
1480		end
1481
1482	substring (a_start_index, an_end_index: like lower): like Current is
1483			-- New string consisting of items [`start_index'.. `end_index'].
1484			--
1485			-- See also `substring_index' and `copy_substring' to save memory.
1486		require
1487			valid_start_index: 1 <= start_index
1488			valid_end_index: end_index <= count
1489			meaningful_interval: start_index <= end_index + 1
1490		local
1491			location, end_point: POINTER
1492		do
1493			create Result.with_capacity(end_index - start_index + 1)
1494			from location:=g_utf8_offset_to_pointer(handle,a_start_index.to_integer_32)
1495			until
1496			loop
1497			end
1498		ensure
1499			substring_count: Result.count = end_index - start_index + 1
1500		end
1501
1502	extend_multiple (unicode: INTEGER; n: INTEGER) is
1503			-- Extend Current with `n' times character `unicode'.
1504		require
1505			n >= 0
1506			valid_unicode_value: valid_unicode(unicode)
1507		local
1508			i: INTEGER
1509		do
1510			from
1511				i := n
1512			until
1513				i = 0
1514			loop
1515				append_character(unicode)
1516				i := i - 1
1517			end
1518		ensure
1519			count = n + old count
1520		end
1521
1522	precede_multiple (unicode: INTEGER; n: INTEGER) is
1523			-- Prepend `n' times character `unicode' to Current.
1524		require
1525			n >= 0
1526			valid_unicode_value: valid_unicode(unicode)
1527		local
1528			i: INTEGER
1529		do
1530			if n > 0 then
1531				if count = 0 then
1532					extend_multiple(unicode, n)
1533				else
1534					--|*** May be implemented in a more efficient way...
1535					from
1536						i := n
1537					until
1538						i = 0
1539					loop
1540						precede(unicode)
1541						i := i - 1
1542					end
1543				end
1544			end
1545		ensure
1546			count = n + old count
1547		end
1548
1549	extend_to_count (unicode: INTEGER; needed_count: INTEGER) is
1550			-- Extend Current with `unicode' until `needed_count' is reached.
1551			-- Do nothing if `needed_count' is already greater or equal
1552			-- to `count'.
1553		require
1554			needed_count >= 0
1555			valid_unicode_value: valid_unicode(unicode)
1556		do
1557			if needed_count > count then
1558				extend_multiple(unicode, needed_count - count)
1559			end
1560		ensure
1561			count >= needed_count
1562		end
1563
1564	precede_to_count (unicode: INTEGER; needed_count: INTEGER) is
1565			-- Prepend `unicode' to Current until `needed_count' is reached.
1566			-- Do nothing if `needed_count' is already greater or equal
1567			-- to `count'.
1568		require
1569			needed_count >= 0
1570			valid_unicode_value: valid_unicode(unicode)
1571		do
1572			if needed_count > count then
1573				precede_multiple(unicode, needed_count - count)
1574			end
1575		ensure
1576			count >= needed_count
1577		end
1578
1579	reverse is
1580			-- Reverse the string.
1581		local
1582			i1, i2: INTEGER
1583		do
1584			not_yet_implemented
1585			--|*** reverse grapheme
1586			from
1587				i1 := 1
1588				i2 := count
1589			until
1590				i1 >= i2
1591			loop
1592				swap(i1, i2)
1593				i1 := i1 + 1
1594				i2 := i2 - 1
1595			end
1596		end
1597
1598	remove_all_occurrences (unicode: INTEGER) is
1599			-- Remove all occurrences of `unicode'.
1600			--
1601			-- See also `occurrences', `remove'.
1602		require
1603			valid_unicode_value: valid_unicode(unicode)
1604		local
1605			i: INTEGER
1606		do
1607			--|*** May be implemented in a more efficient way...
1608			from
1609				i := index_of(unicode, 1)
1610			until
1611				i = 0
1612			loop
1613				remove(i)
1614				i := index_of(unicode, i)
1615			end
1616		ensure
1617			count = old count - old occurrences(unicode)
1618		end
1619
1620	substring_index (other: UNICODE_STRING; start_index: INTEGER): INTEGER is
1621			-- Position of first occurrence of `other' at or after `start', 0 if none.
1622			--
1623			-- See also `substring', `first_substring_index'.
1624		require
1625			other_not_void: other /= Void
1626			valid_start_index: start_index >= 1 and start_index <= count + 1
1627		do
1628			not_yet_implemented
1629		end
1630
1631	first_substring_index (other: UNICODE_STRING): INTEGER is
1632			-- Position of first occurrence of `other' at or after 1, 0 if none.
1633			--
1634			-- See also `substring_index'.
1635		require
1636			other_not_void: other /= Void
1637		do
1638			Result := substring_index(other, 1)
1639		ensure
1640			definition: Result = substring_index(other, 1)
1641		end
1642
1643feature {ANY} -- Splitting a STRING:
1644	split: ARRAY[UTF8_STRING] is
1645			-- Split the string into an array of words. Uses `is_separator'
1646			-- to find words. Gives Void or a non empty array.
1647			--
1648			-- See also `split_in'.
1649		do
1650			if count > 0 then
1651				split_buffer.clear_count
1652				split_in(split_buffer)
1653				if not split_buffer.is_empty then
1654					Result := split_buffer.twin
1655				end
1656			end
1657		ensure
1658			Result /= Void implies not Result.is_empty
1659		end
1660
1661	split_in (words: COLLECTION[UTF_STRING]) is
1662			-- Same jobs as `split' but result is appended in `words'.
1663			--
1664			-- See also `split'.
1665		require
1666			words /= Void
1667		do
1668		ensure
1669			words.count >= old words.count
1670		end
1671
1672	get_new_iterator: ITERATOR[INTEGER] is
1673		do
1674			create {ITERATOR_ON_UNICODE_STRING} Result.make(Current)
1675		end
1676
1677feature {} -- Implementation
1678	split_buffer: ARRAY[UTF8_STRING] is
1679		once
1680			create Result.with_capacity(4, 1)
1681		end
1682
1683feature {}
1684	-- TODO: In UNICODE_STRING we have "manifest_initialize (c: like capacity; s: like storage; ls_cap: INTEGER; lsv: NATIVE_ARRAY[INTEGER_16]; lsi: NATIVE_ARRAY[INTEGER]) is -- This function is a compiler-hook automatically called when -- a manifest unicode string (i.e. U"foo") is used in the Eiffel -- source code." Provide an UTF8 equivalent
1685
1686--   g_utf8_next_char()
1687
1688--  #define     g_utf8_next_char(p)
1689
1690--    Skips to the next character in a UTF-8 string. The string must be valid; this
1691--    macro is as fast as possible, and has no error-checking. You would use this macro
1692--    to iterate over a string character by character. The macro returns the start of
1693--    the next UTF-8 character. Before using this macro, use g_utf8_validate() to
1694--    validate strings that may contain invalid UTF-8.
1695
1696--    p : Pointer to the start of a valid UTF-8 character.
1697
1698--    ---------------------------------------------------------------------------------
1699
1700--   g_utf8_get_char ()
1701
1702--  gunichar    g_utf8_get_char                 (const gchar *p);
1703
1704--    Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does
1705--    not point to a valid UTF-8 encoded character, results are undefined. If you are
1706--    not sure that the bytes are complete valid Unicode characters, you should use
1707--    g_utf8_get_char_validated() instead.
1708
1709--    p :       a pointer to Unicode character encoded as UTF-8
1710--    Returns : the resulting character
1711
1712--    ---------------------------------------------------------------------------------
1713
1714--   g_utf8_get_char_validated ()
1715
1716--  gunichar    g_utf8_get_char_validated       (const gchar *p,
1717--                                               gssize max_len);
1718
1719--    Convert a sequence of bytes encoded as UTF-8 to a Unicode character. This
1720--    function checks for incomplete characters, for invalid characters such as
1721--    characters that are out of the range of Unicode, and for overlong encodings of
1722--    valid characters.
1723
1724--    p :       a pointer to Unicode character encoded as UTF-8
1725--    max_len : the maximum number of bytes to read, or -1, for no maximum.
1726--    Returns : the resulting character. If p points to a partial sequence at the end
1727--              of a string that could begin a valid character, returns (gunichar)-2;
1728--              otherwise, if p does not point to a valid UTF-8 encoded Unicode
1729--              character, returns (gunichar)-1.
1730
1731--    ---------------------------------------------------------------------------------
1732
1733--   g_utf8_offset_to_pointer ()
1734
1735--  gchar*      g_utf8_offset_to_pointer        (const gchar *str,
1736--                                               glong offset);
1737
1738--    Converts from an integer character offset to a pointer to a position within the
1739--    string.
1740
1741--    Since 2.10, this function allows to pass a negative offset to step backwards. It
1742--    is usually worth stepping backwards from the end instead of forwards if offset is
1743--    in the last fourth of the string, since moving forward is about 3 times faster
1744--    than moving backward.
1745
1746--    str :     a UTF-8 encoded string
1747--    offset :  a character offset within str
1748--    Returns : the resulting pointer
1749
1750--    ---------------------------------------------------------------------------------
1751
1752--   g_utf8_pointer_to_offset ()
1753
1754--  glong       g_utf8_pointer_to_offset        (const gchar *str,
1755--                                               const gchar *pos);
1756
1757--    Converts from a pointer to position within a string to a integer character
1758--    offset.
1759
1760--    Since 2.10, this function allows pos to be before str, and returns a negative
1761--    offset in this case.
1762
1763--    str :     a UTF-8 encoded string
1764--    pos :     a pointer to a position within str
1765--    Returns : the resulting character offset
1766
1767--    ---------------------------------------------------------------------------------
1768
1769--   g_utf8_prev_char ()
1770
1771--  gchar*      g_utf8_prev_char                (const gchar *p);
1772
1773--    Finds the previous UTF-8 character in the string before p.
1774
1775--    p does not have to be at the beginning of a UTF-8 character. No check is made to
1776--    see if the character found is actually valid other than it starts with an
1777--    appropriate byte. If p might be the first character of the string, you must use
1778--    g_utf8_find_prev_char() instead.
1779
1780--    p :       a pointer to a position within a UTF-8 encoded string
1781--    Returns : a pointer to the found character.
1782
1783--    ---------------------------------------------------------------------------------
1784
1785--   g_utf8_find_next_char ()
1786
1787--  gchar*      g_utf8_find_next_char           (const gchar *p,
1788--                                               const gchar *end);
1789
1790--    Finds the start of the next UTF-8 character in the string after p.
1791
1792--    p does not have to be at the beginning of a UTF-8 character. No check is made to
1793--    see if the character found is actually valid other than it starts with an
1794--    appropriate byte.
1795
1796--    p :       a pointer to a position within a UTF-8 encoded string
1797--    end :     a pointer to the end of the string, or NULL to indicate that the string
1798--              is nul-terminated, in which case the returned value will be
1799--    Returns : a pointer to the found character or NULL
1800
1801--    ---------------------------------------------------------------------------------
1802
1803--   g_utf8_find_prev_char ()
1804
1805--  gchar*      g_utf8_find_prev_char           (const gchar *str,
1806--                                               const gchar *p);
1807
1808--    Given a position p with a UTF-8 encoded string str, find the start of the
1809--    previous UTF-8 character starting before p. Returns NULL if no UTF-8 characters
1810--    are present in str before p.
1811
1812--    p does not have to be at the beginning of a UTF-8 character. No check is made to
1813--    see if the character found is actually valid other than it starts with an
1814--    appropriate byte.
1815
1816--    str :     pointer to the beginning of a UTF-8 encoded string
1817--    p :       pointer to some position within str
1818--    Returns : a pointer to the found character or NULL.
1819
1820--    ---------------------------------------------------------------------------------
1821
1822--   g_utf8_strlen ()
1823
1824--  glong       g_utf8_strlen                   (const gchar *p,
1825--                                               gssize max);
1826
1827--    Returns the length of the string in characters.
1828
1829--    p :       pointer to the start of a UTF-8 encoded string.
1830--    max :     the maximum number of bytes to examine. If max is less than 0, then the
1831--              string is assumed to be nul-terminated. If max is 0, p will not be
1832--              examined and may be NULL.
1833--    Returns : the length of the string in characters
1834
1835--    ---------------------------------------------------------------------------------
1836
1837--   g_utf8_strncpy ()
1838
1839--  gchar*      g_utf8_strncpy                  (gchar *dest,
1840--                                               const gchar *src,
1841--                                               gsize n);
1842
1843--    Like the standard C strncpy() function, but copies a given number of characters
1844--    instead of a given number of bytes. The src string must be valid UTF-8 encoded
1845--    text. (Use g_utf8_validate() on all text before trying to use UTF-8 utility
1846--    functions with it.)
1847
1848--    dest :    buffer to fill with characters from src
1849--    src :     UTF-8…

Large files files are truncated, but you can click here to view the full file