PageRenderTime 66ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/src/wrappers/glib/partially-implemented/utf8_string.e

http://github.com/tybor/Liberty
Specman e | 2179 lines | 1329 code | 237 blank | 613 comment | 77 complexity | c247625d7eace6f35615ec29190bda03 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1, GPL-2.0
  1. indexing
  2. description: "Unicode UTF-8 string."
  3. copyright: "[
  4. Copyright (C) 2006 Paolo Redaelli, Glib team
  5. This library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation; either version 2.1 of
  8. the License, or (at your option) any later version.
  9. This library is distributed in the hopeOA that it will be useful, but
  10. WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with this library; if not, write to the Free Software
  15. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16. 02110-1301 USA
  17. ]"
  18. class UTF8_STRING
  19. -- A string of Unicode characters encoded into UTF-8.
  20. -- This particular encoding trades access efficiency for space efficiency:
  21. -- it uses a variable amount of memory to store each character, from 8 to
  22. -- 32 bits, thus making traversal and random-access costly; AFAIK random
  23. -- access is O(n) and trasversal while being still an O(1) operation is
  24. -- much more complex and costly than traversing a normal STRING.
  25. -- UNICODE_MANIPULATION
  26. -- A number of functions for dealing with Unicode characters and
  27. -- strings. There are analogues of the traditional C functions
  28. -- found in ctype.h character classification and case conversion
  29. -- functions, UTF-8 analogues of some string utility functions,
  30. -- functions to perform normalization, case conversion and
  31. -- collation on UTF-8 strings and finally functions to convert
  32. -- between the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
  33. -- The implementations of the Unicode functions in GLib are based
  34. -- on the Unicode Character Data tables, which are available from
  35. -- www.unicode.org. GLib 2.8 supports Unicode 4.0, GLib 2.10
  36. -- supports Unicode 4.1, GLib 2.12 supports Unicode 5.0.
  37. inherit
  38. HASHABLE
  39. redefine copy
  40. end
  41. COMPARABLE
  42. redefine is_equal, copy, compare, three_way_comparison
  43. end
  44. TRAVERSABLE[UNICODE_CHARACTER]
  45. redefine is_equal, copy
  46. end
  47. RECYCLABLE
  48. redefine is_equal, copy
  49. end
  50. WRAPPER
  51. insert
  52. GUNICODE_EXTERNALS
  53. GUNICODE_MACROS
  54. GMEM_EXTERNALS
  55. creation {ANY}
  56. make, copy, make_empty, make_filled, from_utf8
  57. feature {ANY}
  58. capacity: NATURAL_32
  59. -- String capacity in bytes
  60. count: NATURAL_32
  61. -- String length in characters
  62. lower: NATURAL_32 is do Result:=0.to_natural_32 end
  63. -- Minimum index; currently it is 0 to mimic C strings; note that
  64. -- correct coding style shall not rely on the actual value of this
  65. -- feature.
  66. upper: NATURAL_32 is
  67. -- Maximum index
  68. do
  69. Result:=count-1.to_natural_32
  70. end
  71. feature {} --
  72. bytes_count: NATURAL_32
  73. -- size in bytes. When you use non-ASCII characters it will be different than count
  74. feature {ANY} -- Creation / Modification:
  75. make (needed_capacity: NATURAL_32) is
  76. -- Initialize the string to have at least `needed_capacity' bytes
  77. -- of storage.
  78. do
  79. if needed_capacity > 0.to_natural_32 then
  80. if capacity < needed_capacity then
  81. wrapper := g_try_malloc(needed_capacity)
  82. if wrapper.is_null then raise(No_more_memory)
  83. else capacity := needed_capacity
  84. end
  85. end
  86. end
  87. count := 0
  88. ensure
  89. needed_capacity <= capacity
  90. empty_string: count = 0
  91. end
  92. make_empty is
  93. -- Create an empty string.
  94. do
  95. make(0)
  96. end
  97. make_filled (a_character: UNICODE_CHARACTER; n: NATURAL_32) is
  98. -- Initialize string with `n' copies of `a_character'.
  99. require
  100. a_character.is_valid
  101. do
  102. make(n)
  103. count := n
  104. fill_with(a_character)
  105. ensure
  106. count_set: count = n
  107. filled: occurrences(a_character) = count
  108. end
  109. from_string (a_string: STRING) is
  110. -- Create an UTF8 string from `a_string'.
  111. require a_string/=Void
  112. local validated: BOOLEAN; an_end: POINTER
  113. do
  114. validated := g_utf8_validate (a_string.to_external, a_string.count, $an_end).to_boolean
  115. if validated then
  116. handle := g_memdup(a_string.to_external, a_string.count)
  117. bytes_count := a_string.count
  118. capacity := bytes_count
  119. count := g_utf8_strlen(handle, bytes_count)
  120. else
  121. raise(Non_valid_utf8_string)
  122. end
  123. end
  124. Non_valid_utf8_string: STRING is "Given string is not UTF8 valid."
  125. feature {WRAPPER_HANDLER}
  126. from_pointer (a_pointer: POINTER) is
  127. -- Create an UTF8 using the content found into `a_pointer' which is a zero-terminated string and it is not copied
  128. local validated: BOOLEAN; an_end: POINTER
  129. do
  130. validated := g_utf8_validate (a_pointer, -1, $an_end).to_boolean
  131. if validated then
  132. handle := a_pointer
  133. bytes_count := an_end - a_pointer
  134. capacity := bytes_count
  135. count := g_utf8_strlen(handle, bytes_count)
  136. else
  137. raise(Non_valid_utf8_string)
  138. end
  139. end
  140. feature {ANY} -- Testing:
  141. is_empty: BOOLEAN is
  142. -- Has string length 0?
  143. --
  144. -- See also `count'.
  145. do
  146. Result := count = 0
  147. end
  148. item (i: like lower): UNICODE_CHARACTER is
  149. -- Get unicode at position `i'.
  150. --
  151. -- See also `lower', `upper', `valid_index', `put'.
  152. require valid_index(i)
  153. local location: POINTER
  154. do
  155. Result.set(g_utf8_get_char(handle+g_utf8_offset_to_pointer(handle,i)))
  156. end
  157. infix "@" (i: like lower): is
  158. -- The infix notation which is actually just a synonym for `item'.
  159. --
  160. -- See also `item', `put'.
  161. require valid_index(i)
  162. do
  163. Result := item(i)
  164. ensure definition: Result = item(i)
  165. end
  166. hash_code: INTEGER is
  167. local i: ITERATOR_ON_UTF8_STRING
  168. do
  169. from i:=get_new_iterator; i.start
  170. until i.is_off
  171. loop
  172. Result := {INTEGER 5} #* Result #+ i.item.code.hash_code
  173. i.next
  174. end
  175. if Result < 0 then
  176. Result := ~Result
  177. end
  178. end
  179. infix "<" (other: like Current): BOOLEAN is
  180. -- Is `Current' less than `other'?
  181. --
  182. -- See also `>', `<=', `>=', `min', `max'.
  183. local
  184. i,j: ITERATOR_ON_UTF8_STRING
  185. do
  186. from
  187. i:=get_new_iterator; i.start
  188. j:=other.get_new_iterator; j.start
  189. maxi := count.min(other.count)
  190. until (i.is_off or j.is_off) or else i.item/=j.item
  191. loop i.next; j.next
  192. end
  193. if i.is_off or j.is_off then
  194. Result := count < other.count
  195. else Result := i.item < j.item
  196. end
  197. end
  198. compare, three_way_comparison (other: like Current): INTEGER is
  199. -- Compares Current with `other' using the linguistically correct rules
  200. -- for the current locale. Result is < 0 if Current compares before
  201. -- `other', 0 if they compare equal, > 0 if Current compares after
  202. -- `other'. When sorting a large number of strings, it will be
  203. -- significantly faster to obtain collation keys with `collate_key'
  204. -- and compare the keys when sorting instead of sorting the original
  205. -- strings. Note: in C langauge comparison of collated key is done with
  206. -- function strcmp; AFAIK it should be the same comparing Eiffel
  207. -- STRINGs Paolo 2009-06-22
  208. do
  209. Result :=g_utf8_collate(handle,other.handle)
  210. end
  211. is_equal (other: like Current): BOOLEAN is
  212. -- Do both strings have the same character sequence?
  213. --
  214. -- See also `same_as'.
  215. local i,j: ITERATOR_ON_UTF8_STRING
  216. do
  217. if Current = other then Result := True
  218. else
  219. if count = other.count then
  220. -- TODO: Could be improved. If stored in canonical form
  221. -- direct memory comparison could be made. Paolo 2009-06-20
  222. from
  223. Result := True
  224. i := Current.get_new_iterator; i.start
  225. j := other.get_new_iterator; j.start
  226. until i.is_off or else Result=False
  227. loop
  228. check not i.is_off implied not j.is_off end
  229. Result := i.item = j.item
  230. i.next; j.next
  231. end
  232. else Result:=False
  233. end
  234. end
  235. end
  236. same_as (other: UNICODE_STRING): BOOLEAN is
  237. -- Case insensitive `is_equal'.
  238. require
  239. other /= Void
  240. local i,j: ITERATOR_ON_UTF8_STRING
  241. do
  242. if count=other.count then
  243. from
  244. i := Current.get_new_iterator; i.start
  245. j := other.get_new_iterator; j.start
  246. Result := True
  247. until i.is_off or else Result = False
  248. loop
  249. Result := i.item.to_lower = j.item.to_lower
  250. i.next; j.next
  251. end
  252. else Result:=False
  253. end
  254. end
  255. index_of (a_character: like item; a_start_index: like lower): REFERENCE[like lower] is
  256. -- Index of first occurrence of `a_character' at or after `a_start_index'
  257. -- Void if not found.
  258. --
  259. -- See also `reverse_index_of', `first_index_of', `last_index_of', `has'.
  260. require
  261. valid_start_index: start_index >= 1 and start_index <= count + 1
  262. a_character.is_valid
  263. local location: POINTER; integer_result: INTEGER_32
  264. do
  265. not_yet_implemented
  266. -- Not correct
  267. location := g_utf8_strchr(handle, -1, a_character.code)
  268. if location.is_not_null then
  269. integer_result := g_utf8_pointer_to_offset(handle,location)
  270. check integer_result > 0 end
  271. create Result.set_item(integer_result.to_natural_32)
  272. end
  273. ensure
  274. Result /= Void implies item(Result.item) = a_character
  275. end
  276. reverse_index_of (a_character: like item; a_start_index: like lower): REFERENCE[like lower] is
  277. -- Index of first occurrence of `a_character' at or before
  278. -- `a_start_index'; Void if none.
  279. -- The search is done in reverse direction, which means from the `start_index' down
  280. -- to the first character.
  281. --
  282. -- See also `index_of', `last_index_of', `first_index_of'.
  283. require
  284. valid_start_index: start_index >= 0 and start_index <= count
  285. a_character.is_valid
  286. local location: POINTER; integer_result: INTEGER_32
  287. do
  288. not_yet_implemented
  289. -- Not correct
  290. location := g_utf8_strrchr(handle, -1, a_character.code)
  291. if location.is_not_null then
  292. integer_result := g_utf8_pointer_to_offset(handle,location)
  293. check integer_result > 0 end
  294. create Result.set_item(integer_result.to_natural_32)
  295. end
  296. ensure
  297. Result /= Void implies item(Result.item) = a_character
  298. end
  299. first_index_of (a_character: like item): REFERENCE[like lower] is
  300. -- Index of first occurrence of `a_character'.
  301. --
  302. -- See also `last_index_of', `index_of', `reverse_index_of'.
  303. require
  304. a_character.is_valid
  305. local location: POINTER; integer_result: INTEGER_32
  306. do
  307. location := g_utf8_strchr(handle, -1, a_character.code)
  308. if location.is_not_null then
  309. integer_result := g_utf8_pointer_to_offset(handle,location)
  310. check integer_result > 0 end
  311. create Result.set_item(integer_result.to_natural_32)
  312. end
  313. ensure
  314. definition: Resulti/=Void implies Result.is_equal(index_of(a_character,lower))
  315. end
  316. last_index_of (unicode: like item): REFERENCE[like lower] is
  317. -- Index of last occurrence of `unicode', 0 if none.
  318. --
  319. -- See also `first_index_of', `reverse_index_of', `index_of'.
  320. do
  321. not_yet_implemented
  322. ensure
  323. definition: Result = reverse_index_of(unicode, upper)
  324. end
  325. has (a_character: like first): BOOLEAN is
  326. -- True if `unicode' is in the STRING.
  327. --
  328. -- See also `index_of', `occurrences', `has_substring'.
  329. require
  330. valid_unicode_value: valid_unicode(unicode)
  331. do
  332. Result := index_of(unicode, 1) /= 0
  333. end
  334. has_substring (other: UNICODE_STRING): BOOLEAN is
  335. -- True if `Current' contains `other'.
  336. --
  337. -- See also `substring_index', `has'.
  338. require
  339. other_not_void: other /= Void
  340. do
  341. Result := substring_index(other, 1) /= 0
  342. end
  343. occurrences (unicode: INTEGER): INTEGER is
  344. -- Number of times character `unicode' appears in the string.
  345. --
  346. -- See also `remove_all_occurrences', `has'.
  347. require
  348. valid_unicode_value: valid_unicode(unicode)
  349. local
  350. i: INTEGER
  351. do
  352. from
  353. i := index_of(unicode, 1)
  354. until
  355. i = 0
  356. loop
  357. Result := Result + 1
  358. i := index_of(unicode, i)
  359. end
  360. ensure
  361. Result >= 0
  362. end
  363. has_suffix (s: UNICODE_STRING): BOOLEAN is
  364. -- True if suffix of `Current' is `s'.
  365. --
  366. -- See also `remove_suffix', `has_prefix', `has_substring'.
  367. require
  368. s /= Void
  369. local
  370. i, offset: INTEGER
  371. do
  372. offset := count - s.count
  373. from
  374. Result := offset >= 0
  375. i := lower
  376. until
  377. not Result or else i > s.upper
  378. loop
  379. Result := item(i + offset) = s.item(i)
  380. i := i + 1
  381. end
  382. end
  383. has_prefix (p: UNICODE_STRING): BOOLEAN is
  384. -- True if prefix of `Current' is `p'.
  385. require
  386. p /= Void
  387. local
  388. i: INTEGER
  389. do
  390. from
  391. Result := count >= p.count
  392. i := lower
  393. until
  394. not Result or else i > p.upper
  395. loop
  396. Result := item(i) = p.item(i)
  397. i := i + 1
  398. end
  399. end
  400. feature {ANY} -- Testing and Conversion:
  401. is_ascii: BOOLEAN is
  402. -- True if all unicode value is in range 0..127
  403. local
  404. i: INTEGER
  405. do
  406. from
  407. i := count - 1
  408. until
  409. i < 0 or else storage.item(i) & 0xFF80 /= 0
  410. loop
  411. i := i - 1
  412. end
  413. Result := i < 0
  414. end
  415. to_utf8: STRING is
  416. -- New string is created, current unicode string is encoded
  417. -- with UTF-8 format.
  418. --
  419. -- See also: `utf8_encode_in' and `as_utf8' to save memory.
  420. do
  421. tmp_buffer.clear_count
  422. utf8_encode_in(tmp_buffer)
  423. Result := tmp_buffer.twin
  424. end
  425. to_string: STRING is
  426. obsolete "Now use `to_utf8' instead (May 2008)."
  427. do
  428. Result := to_utf8
  429. end
  430. as_utf8: STRING is
  431. -- Encode the string in UTF-8. Always returns the same once object.
  432. --
  433. -- See also: `to_utf8', `utf8_encode_in'.
  434. do
  435. Result := once ""
  436. Result.clear_count
  437. utf8_encode_in(Result)
  438. end
  439. as_string: STRING is
  440. obsolete "Now use `as_utf8' instead (May 2008)."
  441. do
  442. Result := as_utf8
  443. end
  444. utf8_encode_in (s: STRING) is
  445. -- Append the string in UTF-8 to `s'.
  446. --
  447. -- See also: `to_utf8', `as_utf8'.
  448. require
  449. s /= Void
  450. local
  451. i: INTEGER; v: INTEGER
  452. do
  453. from
  454. i := 1
  455. until
  456. i > count
  457. loop
  458. v := item(i)
  459. if v < 128 then
  460. s.extend(v.to_character)
  461. elseif v < 2048 then
  462. s.extend((v #// 64 + 192).to_character)
  463. s.extend((v #\\ 64 + 128).to_character)
  464. elseif v < 65536 then
  465. s.extend((v #// 4096 + 224).to_character)
  466. v := v #\\ 4096
  467. s.extend((v #// 64 + 128).to_character)
  468. s.extend((v #\\ 64 + 128).to_character)
  469. else
  470. check
  471. v < 0x00110000
  472. end
  473. s.extend((v #// 0x00040000 + 240).to_character)
  474. v := v #\\ 0x00040000
  475. s.extend((v #// 0x00001000 + 128).to_character)
  476. v := v #\\ 0x00001000
  477. s.extend((v #// 64 + 128).to_character)
  478. s.extend((v #\\ 64 + 128).to_character)
  479. end
  480. i := i + 1
  481. end
  482. end
  483. utf16be_encode_in (s: STRING) is
  484. -- Append the string in UTF-16BE to `s'
  485. require
  486. s /= Void
  487. local
  488. i, k: INTEGER; v: INTEGER_16
  489. do
  490. from
  491. until
  492. i >= count
  493. loop
  494. v := storage.item(i)
  495. s.extend((v |>>> 8).to_character)
  496. s.extend((v & 0x00FF).to_character)
  497. if v & 0xF800 = 0xD800 then
  498. check
  499. low_surrogate_indexes.item(k) = i + 1
  500. end
  501. s.extend((low_surrogate_values.item(k) #// 256 + 220).to_character)
  502. s.extend((low_surrogate_values.item(k) & 0x00FF).to_character)
  503. k := k + 1
  504. end
  505. i := i + 1
  506. end
  507. end
  508. utf8_decode_from (s: STRING): BOOLEAN is
  509. -- Use `s' as UTF-8 format encoded unicode string
  510. -- Return `False' if decoding process failed
  511. require
  512. s /= Void
  513. local
  514. i, k, seq_length: INTEGER; v: INTEGER
  515. do
  516. from
  517. Result := True
  518. i := 1
  519. until
  520. i > s.count
  521. loop
  522. v := s.item(i).code
  523. i := i + 1
  524. inspect
  525. v
  526. when 0 .. 127 then
  527. extend(v)
  528. k := 0
  529. when 192 .. 223 then
  530. v := v - 192
  531. k := 2
  532. when 224 .. 239 then
  533. v := v - 224
  534. k := 3
  535. when 240 .. 247 then
  536. v := v - 240
  537. k := 4
  538. else
  539. extend(65533)
  540. Result := False
  541. k := 0
  542. end
  543. from
  544. seq_length := k
  545. until
  546. k <= 1
  547. loop
  548. if i <= s.count and then s.item(i).code.in_range(128, 191) then
  549. v := v * 64 + s.item(i).code - 128
  550. i := i + 1
  551. k := k - 1
  552. else
  553. extend(65533)
  554. Result := False
  555. k := 0
  556. end
  557. end
  558. if k = 1 then
  559. if v < 128 or else v < 2048 and then seq_length > 2 or else v < 65536 and then seq_length > 3 then
  560. -- overlong sequence, must be refused by any UTF-8
  561. -- complient decode for security reasons.
  562. extend(65533)
  563. Result := False
  564. elseif not valid_unicode(v) then
  565. extend(65533)
  566. Result := False
  567. else
  568. extend(v)
  569. end
  570. end
  571. end
  572. end
  573. feature {}
  574. from_utf8 (s: STRING) is
  575. -- Use `s' as UTF-8 format encoded unicode string
  576. -- This function may be used for manifest strings
  577. -- See `utf8_decode_from' for error detection
  578. require
  579. s /= Void
  580. local
  581. error: BOOLEAN
  582. do
  583. make(s.count)
  584. error := utf8_decode_from(s)
  585. end
  586. feature {ANY} -- Modification:
  587. resize (new_count: INTEGER) is
  588. -- Resize Current. When `new_count' is greater than
  589. -- `count', new positions are initialized with unicode 0.
  590. require
  591. new_count >= 0
  592. local
  593. i: INTEGER
  594. do
  595. if new_count <= count then
  596. elseif capacity < new_count then
  597. if capacity = 0 then
  598. storage := storage.calloc(new_count)
  599. else
  600. storage := storage.realloc(capacity, new_count)
  601. end
  602. capacity := new_count
  603. else
  604. storage.clear(count, new_count - 1)
  605. end
  606. count := new_count
  607. from
  608. i := low_surrogate_indexes.upper
  609. until
  610. i < 0 or else low_surrogate_indexes.item(i) <= new_count
  611. loop
  612. --TODO: only one remove out of the loop
  613. low_surrogate_indexes.remove_last
  614. low_surrogate_values.remove_last
  615. i := i - 1
  616. end
  617. ensure
  618. count = new_count
  619. capacity >= old capacity
  620. end
  621. clear_count, wipe_out is
  622. -- Discard all characters so that `is_empty' is True after that call.
  623. -- The internal `capacity' is not changed by this call (i.e. the internal `storage' memory is
  624. -- neither released nor shrunk).
  625. --
  626. -- See also `clear_count_and_capacity'.
  627. do
  628. count := 0
  629. low_surrogate_indexes.make(0)
  630. low_surrogate_values.make(0)
  631. ensure
  632. is_empty: count = 0
  633. capacity = old capacity
  634. end
  635. clear_count_and_capacity is
  636. -- Discard all characters (`is_empty' is True after that call). The internal `capacity' may also be
  637. -- reduced after this call.
  638. --
  639. -- See also `clear_count'.
  640. do
  641. low_surrogate_indexes.clear_count_and_capacity
  642. low_surrogate_values.clear_count_and_capacity
  643. clear_count
  644. --*** capacity := 0
  645. --*** storage := null_storage
  646. ensure
  647. is_empty: count = 0
  648. capacity = 0
  649. end
  650. copy (other: like Current) is
  651. -- Copy `other' onto Current.
  652. --
  653. -- See also `copy_substring'.
  654. do
  655. count := other.count
  656. if count > 0 then
  657. if capacity < count then
  658. storage := storage.calloc(count)
  659. capacity := count
  660. end
  661. storage.copy_from(other.storage, count - 1)
  662. end
  663. if low_surrogate_indexes = Void then
  664. create low_surrogate_indexes.make(0)
  665. create low_surrogate_values.make(0)
  666. end
  667. low_surrogate_indexes.copy(other.low_surrogate_indexes)
  668. low_surrogate_values.copy(other.low_surrogate_values)
  669. ensure then
  670. count = other.count
  671. end
  672. copy_substring (s: like Current; start_index, end_index: INTEGER) is
  673. -- Copy the substring from `s' from `start_index' to `end_index'
  674. -- to Current.
  675. --
  676. -- See also `copy'.
  677. --|*** DUMB IMPLEMENTATION
  678. require
  679. string_not_void: s /= Void
  680. valid_start_index: 1 <= start_index
  681. valid_end_index: end_index <= s.count
  682. meaningful_interval: start_index <= end_index + 1
  683. do
  684. clear_count
  685. append_substring(s, start_index, end_index)
  686. end
  687. fill_with (unicode: INTEGER) is
  688. -- Replace every unicode with the new value.
  689. require
  690. valid_unicode_value: valid_unicode(unicode)
  691. local
  692. i: INTEGER; code: INTEGER_16; remainder: INTEGER_16
  693. do
  694. if unicode >= 65536 then
  695. -- stored as high and low surrogate
  696. code := (unicode #// 1024 - 64).low_16
  697. remainder := (unicode & 0x000003FF).to_integer_16 --unicode #\\ 1024
  698. storage.set_all_with(code, count - 1)
  699. low_surrogate_values.resize(count)
  700. low_surrogate_values.set_all_with(remainder)
  701. from
  702. i := count - 1
  703. low_surrogate_indexes.resize(count)
  704. until
  705. i < 0
  706. loop
  707. low_surrogate_indexes.put(i + 1, i)
  708. i := i - 1
  709. end
  710. else
  711. code := unicode.low_16
  712. storage.set_all_with(code, count - 1)
  713. low_surrogate_values.resize(0)
  714. low_surrogate_indexes.resize(0)
  715. end
  716. ensure
  717. occurrences(unicode) = count
  718. end
  719. replace_all (old_code, new_code: like item) is
  720. -- Replace all occurrences of the element `old_code' by `new_code'.
  721. require
  722. valid_unicode_value: valid_unicode(old_code)
  723. valid_unicode_value: valid_unicode(new_code)
  724. local
  725. i: INTEGER
  726. do
  727. --*** May be implemented in a more efficient way...
  728. if old_code /= new_code then
  729. from
  730. i := index_of(old_code, 1)
  731. until
  732. i = 0
  733. loop
  734. put(new_code, i)
  735. i := index_of(old_code, i + 1)
  736. end
  737. end
  738. ensure
  739. count = old count
  740. old_code /= new_code implies occurrences(old_code) = 0
  741. end
  742. append, append_string (s: UNICODE_STRING) is
  743. -- Append a copy of 's' to `Current'.
  744. --
  745. -- See also `add_last', `add_first', `prepend', '+'.
  746. require
  747. s_not_void: s /= Void
  748. local
  749. s_count, needed_capacity, new_capacity, i: INTEGER; indexes: FAST_ARRAY[INTEGER]
  750. do
  751. s_count := s.count
  752. needed_capacity := count + s_count
  753. if needed_capacity > capacity then
  754. if capacity = 0 then
  755. storage := storage.calloc(needed_capacity)
  756. capacity := needed_capacity
  757. else
  758. new_capacity := (2 * capacity).max(needed_capacity)
  759. storage := storage.realloc(capacity, new_capacity)
  760. capacity := new_capacity
  761. end
  762. end
  763. storage.copy_at(count, s.storage, s_count)
  764. from
  765. indexes := s.low_surrogate_indexes
  766. until
  767. i > indexes.upper
  768. loop
  769. low_surrogate_indexes.add_last(indexes.item(i) + count)
  770. low_surrogate_values.add_last(s.low_surrogate_values.item(i))
  771. i := i + 1
  772. end
  773. count := needed_capacity
  774. end
  775. append_substring (s: like Current; start_index, end_index: INTEGER) is
  776. -- Append the substring from `s' from `start_index' to `end_index'
  777. -- to Current.
  778. --|*** DUMB IMPLEMENTATION
  779. require
  780. string_not_void: s /= Void
  781. valid_start_index: 1 <= start_index
  782. valid_end_index: end_index <= s.count
  783. meaningful_interval: start_index <= end_index + 1
  784. local
  785. i: INTEGER
  786. do
  787. from
  788. i := start_index
  789. until
  790. i > end_index
  791. loop
  792. extend(s.item(i))
  793. i := i + 1
  794. end
  795. end
  796. prepend (other: UNICODE_STRING) is
  797. -- Prepend `other' to `Current'.
  798. --
  799. -- See also `append'.
  800. require
  801. other /= Void
  802. local
  803. i, j, k: INTEGER
  804. do
  805. i := count
  806. j := other.count
  807. resize(i + j)
  808. if i > 0 and then j > 0 then
  809. storage.move(0, i - 1, j)
  810. from
  811. k := low_surrogate_indexes.upper
  812. until
  813. k < 0
  814. loop
  815. low_surrogate_indexes.put(low_surrogate_indexes.item(k) + j, k)
  816. k := k - 1
  817. end
  818. end
  819. -- May be implemented in a more efficient way...
  820. from
  821. k := other.low_surrogate_indexes.upper
  822. until
  823. k < 0
  824. loop
  825. low_surrogate_indexes.add_first(other.low_surrogate_indexes.item(k))
  826. low_surrogate_values.add_first(other.low_surrogate_values.item(k))
  827. k := k - 1
  828. end
  829. storage.copy_from(other.storage, j - 1)
  830. ensure
  831. (old other.twin + old Current.twin).is_equal(Current)
  832. end
  833. insert_string (s: UNICODE_STRING; i: INTEGER) is
  834. -- Insert `s' at index `i', shifting characters from index `i'
  835. -- to `count' rightwards.
  836. require
  837. string_not_void: s /= Void
  838. valid_insertion_index: 1 <= i and i <= count + 1
  839. local
  840. j, k: INTEGER; pos, n: INTEGER
  841. do
  842. j := count
  843. k := s.count
  844. resize(j + k)
  845. if i <= j then
  846. storage.move(i - 1, j - 1, k)
  847. end
  848. storage.copy_at(i - 1, s.storage, k)
  849. pos := low_surrogate_position(i)
  850. j := low_surrogate_indexes.count + s.low_surrogate_indexes.count
  851. low_surrogate_indexes.resize(j)
  852. low_surrogate_values.resize(j)
  853. from
  854. -- move existing surrogates and adjust indexes
  855. n := s.low_surrogate_indexes.upper
  856. until
  857. n < 0
  858. loop
  859. j := j - 1
  860. low_surrogate_indexes.put(low_surrogate_indexes.item(pos + n) + k, j)
  861. low_surrogate_values.put(low_surrogate_values.item(pos + n), j)
  862. n := n - 1
  863. end
  864. from
  865. -- copy surrogates from s and adjust indexes
  866. n := s.low_surrogate_indexes.upper
  867. j := pos + n
  868. until
  869. n < 0
  870. loop
  871. low_surrogate_indexes.put(s.low_surrogate_indexes.item(n) + i, j)
  872. low_surrogate_values.put(s.low_surrogate_values.item(n), j)
  873. j := j - 1
  874. n := n - 1
  875. end
  876. end
  877. replace_substring (s: UNICODE_STRING; start_index, end_index: INTEGER) is
  878. -- Replace the substring from `start_index' to `end_index',
  879. -- inclusive, with `s'.
  880. require
  881. string_not_void: s /= Void
  882. valid_start_index: 1 <= start_index
  883. valid_end_index: end_index <= count
  884. meaningful_interval: start_index <= end_index + 1
  885. do
  886. -- May be implemented in a more efficient way...
  887. remove_between(start_index, end_index)
  888. insert_string(s, start_index)
  889. end
  890. infix "+" (other: UNICODE_STRING): like Current is
  891. -- Create a new UNICODE_STRING which is the concatenation of
  892. -- `Current' and `other'.
  893. --
  894. -- See also `append'.
  895. require
  896. other_exists: other /= Void
  897. do
  898. create Result.make(count + other.count)
  899. Result.append(Current)
  900. Result.append(other)
  901. ensure
  902. result_count: Result.count = count + other.count
  903. end
  904. put (unicode: INTEGER; i: INTEGER) is
  905. -- Put `unicode' at position `i'.
  906. --
  907. -- See also `item', `lower', `upper', `swap'.
  908. require
  909. valid_index: valid_index(i)
  910. valid_unicode_value: valid_unicode(unicode)
  911. local
  912. v, n: INTEGER
  913. do
  914. if unicode >= 65536 then
  915. -- stored as high and low surrogate
  916. v := unicode #// 1024 - 64
  917. if storage.item(i - 1) & 0xF800 = 0xD800 then
  918. low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, low_surrogate_index(i))
  919. else
  920. n := low_surrogate_position(i)
  921. low_surrogate_indexes.add_last(0)
  922. low_surrogate_values.add_last(0)
  923. if n /= low_surrogate_indexes.upper then
  924. low_surrogate_indexes.move(n, low_surrogate_indexes.upper - 1, 1)
  925. low_surrogate_values.move(n, low_surrogate_values.upper - 1, 1)
  926. end
  927. low_surrogate_indexes.put(i, n)
  928. low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, n)
  929. end
  930. storage.put(v.low_16, i - 1)
  931. else
  932. if storage.item(i - 1) & 0xF800 = 0xD800 then
  933. v := low_surrogate_index(i)
  934. low_surrogate_indexes.remove(v)
  935. low_surrogate_values.remove(v)
  936. end
  937. storage.put(unicode.low_16, i - 1)
  938. end
  939. ensure
  940. item(i) = unicode
  941. end
  942. swap (i1, i2: INTEGER) is
  943. -- Swap two characters.
  944. --
  945. -- See also `item', `put'.
  946. require
  947. valid_index(i1)
  948. valid_index(i2)
  949. local
  950. tmp: INTEGER_16; j1, j2: INTEGER; low_tmp: INTEGER_16; k1, k2: INTEGER
  951. do
  952. j1 := i1 - 1
  953. j2 := i2 - 1
  954. tmp := storage.item(j1)
  955. if tmp & 0xF800 = 0xD800 then
  956. if storage.item(j2) & 0xF800 = 0xD800 then
  957. k1 := low_surrogate_index(i1)
  958. k2 := low_surrogate_index(i2)
  959. low_tmp := low_surrogate_values.item(k1)
  960. low_surrogate_values.put(low_surrogate_values.item(k2), k1)
  961. low_surrogate_values.put(low_tmp, k2)
  962. low_surrogate_indexes.put(i2, k1)
  963. low_surrogate_indexes.put(i1, k2)
  964. else
  965. low_tmp := low_surrogate_values.item(k1)
  966. k1 := low_surrogate_index(i1)
  967. k2 := low_surrogate_position(i2)
  968. if k2 > k1 + 1 then
  969. low_surrogate_indexes.move(k1 + 1, k2 - 1, -1)
  970. low_surrogate_values.move(k1 + 1, k2 - 1, -1)
  971. k2 := k2 - 1
  972. elseif k1 > k2 then
  973. low_surrogate_indexes.move(k2, k1 - 1, 1)
  974. low_surrogate_values.move(k2, k1 - 1, 1)
  975. --else no move
  976. end
  977. low_surrogate_indexes.put(i1, k2)
  978. low_surrogate_values.put(low_tmp, k2)
  979. end
  980. else
  981. if storage.item(j2) & 0xF800 = 0xD800 then
  982. low_tmp := low_surrogate_values.item(k2)
  983. k1 := low_surrogate_position(i1)
  984. k2 := low_surrogate_index(i2)
  985. if k1 > k2 + 1 then
  986. low_surrogate_indexes.move(k2 + 1, k1 - 1, -1)
  987. low_surrogate_values.move(k2 + 1, k1 - 1, -1)
  988. k1 := k1 - 1
  989. elseif k2 > k1 then
  990. low_surrogate_indexes.move(k1, k2 - 1, 1)
  991. low_surrogate_values.move(k1, k2 - 1, 1)
  992. --else no move
  993. end
  994. low_surrogate_indexes.put(i2, k1)
  995. low_surrogate_values.put(low_tmp, k1)
  996. -- else i1 and i2 are not surrogate
  997. end
  998. end
  999. storage.put(storage.item(j2), j1)
  1000. storage.put(tmp, j2)
  1001. ensure
  1002. item(i1) = old item(i2)
  1003. item(i2) = old item(i1)
  1004. end
  1005. insert_character (unicode: INTEGER; i: INTEGER) is
  1006. -- Inserts `unicode' at index `i', shifting characters from
  1007. -- position 'i' to `count' rightwards.
  1008. require
  1009. valid_insertion_index: 1 <= i and i <= count + 1
  1010. valid_unicode_value: valid_unicode(unicode)
  1011. local
  1012. j, k: INTEGER
  1013. do
  1014. k := low_surrogate_position(i)
  1015. from
  1016. j := low_surrogate_indexes.upper
  1017. until
  1018. j < k
  1019. loop
  1020. low_surrogate_indexes.put(low_surrogate_indexes.item(j) + 1, j)
  1021. j := j - 1
  1022. end
  1023. resize(count + 1)
  1024. if count > 1 then
  1025. storage.move(i - 1, count - 2, 1)
  1026. storage.put(0, i - 1)
  1027. end
  1028. put(unicode, i)
  1029. ensure
  1030. item(i) = unicode
  1031. end
  1032. shrink (min_index, max_index: INTEGER) is
  1033. -- Keep only the slice [`min_index' .. `max_index'] or nothing
  1034. -- when the slice is empty.
  1035. require
  1036. 1 <= min_index
  1037. max_index <= count
  1038. min_index <= max_index + 1
  1039. local
  1040. i, j: INTEGER
  1041. do
  1042. if max_index < min_index then
  1043. count := 0
  1044. low_surrogate_indexes.make(0)
  1045. low_surrogate_values.make(0)
  1046. elseif min_index = 1 then
  1047. count := max_index
  1048. i := low_surrogate_position(count)
  1049. if i <= low_surrogate_indexes.upper then
  1050. if low_surrogate_indexes.item(i) = max_index then
  1051. i := i + 1
  1052. end
  1053. end
  1054. low_surrogate_indexes.resize(i)
  1055. low_surrogate_values.resize(i)
  1056. else
  1057. storage.slice_copy(0, storage, min_index - 1, max_index - 1)
  1058. from
  1059. i := low_surrogate_position(min_index)
  1060. until
  1061. i > low_surrogate_indexes.upper or else low_surrogate_indexes.item(i) > max_index
  1062. loop
  1063. low_surrogate_indexes.put(low_surrogate_indexes.item(i) - min_index + 1, j)
  1064. low_surrogate_values.put(low_surrogate_values.item(i), j)
  1065. j := j + 1
  1066. i := i + 1
  1067. end
  1068. low_surrogate_indexes.resize(j)
  1069. low_surrogate_values.resize(j)
  1070. count := max_index - min_index + 1
  1071. end
  1072. ensure
  1073. count = max_index - min_index + 1
  1074. end
  1075. remove (i: INTEGER) is
  1076. -- Remove character at position `i'.
  1077. --
  1078. -- See also `remove_head', `remove_between', `remove_suffix', `remove_prefix'.
  1079. require
  1080. valid_removal_index: valid_index(i)
  1081. do
  1082. remove_between(i, i)
  1083. ensure
  1084. count = old count - 1
  1085. end
  1086. add_first, precede (unicode: INTEGER) is
  1087. -- Add `unicode' at first position.
  1088. --
  1089. -- See also `add_last'.
  1090. require
  1091. valid_unicode_value: valid_unicode(unicode)
  1092. local
  1093. i: INTEGER
  1094. do
  1095. from
  1096. i := low_surrogate_indexes.upper
  1097. until
  1098. i < 0
  1099. loop
  1100. low_surrogate_indexes.put(low_surrogate_indexes.item(i) + 1, i)
  1101. i := i - 1
  1102. end
  1103. resize(count + 1)
  1104. if count > 1 then
  1105. storage.move(0, count - 2, 1)
  1106. storage.put(0, 0)
  1107. end
  1108. put(unicode, 1)
  1109. ensure
  1110. count = 1 + old count
  1111. item(1) = unicode
  1112. end
  1113. add_last, append_character, extend (unicode: INTEGER) is
  1114. -- Append `unicode' to string.
  1115. --
  1116. -- See also `add_first'.
  1117. require
  1118. valid_unicode_value: valid_unicode(unicode)
  1119. local
  1120. new_capacity: INTEGER
  1121. do
  1122. if capacity > count then
  1123. elseif capacity = 0 then
  1124. new_capacity := 32
  1125. storage := storage.calloc(new_capacity)
  1126. capacity := new_capacity
  1127. else
  1128. new_capacity := 2 * capacity
  1129. storage := storage.realloc(capacity, new_capacity)
  1130. capacity := new_capacity
  1131. end
  1132. if unicode >= 65536 then
  1133. -- stored as high and low surrogate
  1134. low_surrogate_indexes.add_last(count)
  1135. low_surrogate_values.add_last((unicode & 0x000003FF).to_integer_16)
  1136. storage.put((unicode #// 1024 - 64).low_16, count)
  1137. else
  1138. storage.put(unicode.low_16, count)
  1139. end
  1140. count := count + 1
  1141. ensure
  1142. count = 1 + old count
  1143. item(count) = unicode
  1144. end
  1145. to_lower is
  1146. -- Convert all characters to lower case.
  1147. --
  1148. -- See also `to_upper', `as_lower', `as_upper'.
  1149. do
  1150. not_yet_implemented
  1151. end
  1152. to_upper is
  1153. -- Convert all characters to upper case.
  1154. --
  1155. -- See also `to_lower', `as_upper', `as_lower'.
  1156. do
  1157. not_yet_implemented
  1158. end
  1159. as_lower: like Current is
  1160. -- New object with all letters in lower case.
  1161. --
  1162. -- See also `as_upper', `to_lower', `to_upper'.
  1163. do
  1164. create Result.copy(Current)
  1165. Result.to_lower
  1166. end
  1167. as_upper: like Current is
  1168. -- New object with all letters in upper case.
  1169. --
  1170. -- See also `as_lower', `to_upper', `to_lower'.
  1171. do
  1172. create Result.copy(Current)
  1173. Result.to_upper
  1174. end
  1175. keep_head (n: INTEGER) is
  1176. -- Remove all characters except for the first `n'.
  1177. -- Do nothing if `n' >= `count'.
  1178. --
  1179. -- See also `keep_tail', `remove_head', `remove_tail'.
  1180. require
  1181. n_non_negative: n >= 0
  1182. do
  1183. if n < count then
  1184. remove_tail(count - n)
  1185. end
  1186. ensure
  1187. count = n.min(old count)
  1188. end
  1189. keep_tail (n: INTEGER) is
  1190. -- Remove all characters except for the last `n'.
  1191. -- Do nothing if `n' >= `count'.
  1192. --
  1193. -- See also `keep_head', `remove_tail', `remove_head'.
  1194. require
  1195. n_non_negative: n >= 0
  1196. do
  1197. if n < count then
  1198. remove_head(count - n)
  1199. end
  1200. ensure
  1201. count = n.min(old count)
  1202. end
  1203. remove_first is
  1204. -- Remove the `first' item.
  1205. --
  1206. -- See also `remove_head', `remove_last', `remove'.
  1207. require
  1208. not is_empty
  1209. do
  1210. --*** May be improved?
  1211. remove_between(1, 1)
  1212. ensure
  1213. count = old count - 1
  1214. end
  1215. remove_head (n: INTEGER) is
  1216. -- Remove `n' first characters. If `n' >= `count', remove all.
  1217. --
  1218. -- See also `remove_tail', `remove', `remove_the_first'.
  1219. require
  1220. n_non_negative: n >= 0
  1221. do
  1222. if n > count then
  1223. count := 0
  1224. low_surrogate_indexes.make(0)
  1225. low_surrogate_values.make(0)
  1226. else
  1227. if n > 0 then
  1228. remove_between(1, n)
  1229. end
  1230. end
  1231. ensure
  1232. count = (old count - n).max(0)
  1233. end
  1234. remove_last is
  1235. -- Remove the `last' item.
  1236. --
  1237. -- See also `remove_tail', `remove_first', `remove'.
  1238. require
  1239. not is_empty
  1240. do
  1241. --*** May be improved
  1242. remove_tail(1)
  1243. ensure
  1244. count = old count - 1
  1245. end
  1246. remove_tail (n: INTEGER) is
  1247. -- Remove `n' last characters. If `n' >= `count', remove all.
  1248. --
  1249. -- See also `remove_head', `remove', `remove_the_last'.
  1250. require
  1251. n_non_negative: n >= 0
  1252. local
  1253. i: INTEGER
  1254. do
  1255. if n > count then
  1256. count := 0
  1257. low_surrogate_indexes.make(0)
  1258. low_surrogate_values.make(0)
  1259. else
  1260. count := count - n
  1261. i := low_surrogate_position(count + 1)
  1262. low_surrogate_indexes.resize(i)
  1263. low_surrogate_values.resize(i)
  1264. end
  1265. ensure
  1266. count = (old count - n).max(0)
  1267. end
  1268. remove_substring, remove_between (start_index, end_index: INTEGER) is
  1269. -- Remove all characters from `strt_index' to `end_index' inclusive.
  1270. require
  1271. valid_start_index: 1 <= start_index
  1272. valid_end_index: end_index <= count
  1273. meaningful_interval: start_index <= end_index + 1
  1274. local
  1275. i, k, len: INTEGER
  1276. do
  1277. len := end_index - start_index + 1
  1278. if len > 0 then
  1279. from
  1280. i := low_surrogate_position(start_index)
  1281. k := low_surrogate_position(end_index + 1)
  1282. until
  1283. k > low_surrogate_indexes.upper
  1284. loop
  1285. low_surrogate_indexes.put(low_surrogate_indexes.item(k) - len, i)
  1286. low_surrogate_values.put(low_surrogate_values.item(k), i)
  1287. k := k + 1
  1288. i := i + 1
  1289. end
  1290. low_surrogate_indexes.resize(i)
  1291. low_surrogate_values.resize(i)
  1292. storage.slice_copy(start_index - 1, storage, end_index, count - 1)
  1293. count := count - len
  1294. end
  1295. ensure
  1296. count = old count - (end_index - start_index + 1)
  1297. end
  1298. remove_suffix (s: UNICODE_STRING) is
  1299. -- Remove the suffix `s' of current string.
  1300. --
  1301. -- See also `remove_prefix', `remove_tail', `remove'.
  1302. require
  1303. has_suffix(s)
  1304. do
  1305. not_yet_implemented
  1306. -- remove_last(s.count); equal sequence may have different size
  1307. ensure
  1308. (old Current.twin).is_equal(Current + old s.twin)
  1309. end
  1310. remove_prefix (s: UNICODE_STRING) is
  1311. -- Remove the prefix `s' of current string.
  1312. --
  1313. -- See also `remove_suffix', `remove_head', `remove'.
  1314. require
  1315. has_prefix(s)
  1316. do
  1317. not_yet_implemented
  1318. -- remove_head(s.count); equal sequence may have different size
  1319. ensure
  1320. (old Current.twin).is_equal(old s.twin + Current)
  1321. end
  1322. left_adjust is
  1323. -- Remove leading blanks.
  1324. --
  1325. -- See also `remove_head', `first'.
  1326. local
  1327. i: INTEGER
  1328. do
  1329. from
  1330. i := 1
  1331. until
  1332. i > count or else not is_space(item(i -- not_yet_implemented -- handle combining characters
  1333. ))
  1334. loop
  1335. i := i + 1
  1336. end
  1337. remove_head(i - 1)
  1338. ensure
  1339. -- not_yet_implemented -- handle combining characters
  1340. stripped: is_empty or else not is_space(first)
  1341. end
  1342. right_adjust is
  1343. -- Remove trailing blanks.
  1344. --
  1345. -- See also `remove_tail', `last'.
  1346. local
  1347. i: INTEGER
  1348. do
  1349. from
  1350. until
  1351. count = 0 or else not is_space(item(count -- not_yet_implemented -- handle combining characters
  1352. ))
  1353. loop
  1354. count := count - 1
  1355. end
  1356. i := low_surrogate_position(count + 1)
  1357. low_surrogate_indexes.resize(i)
  1358. low_surrogate_values.resize(i)
  1359. ensure
  1360. -- not_yet_implemented -- handle combining characters
  1361. stripped: is_empty or else not is_space(last)
  1362. end
  1363. feature {ANY} -- Printing:
  1364. out_in_tagged_out_memory is
  1365. do
  1366. utf8_encode_in(tagged_out_memory)
  1367. end
  1368. fill_tagged_out_memory is
  1369. do
  1370. tagged_out_memory.append(once "count: ")
  1371. count.append_in(tagged_out_memory)
  1372. tagged_out_memory.append(once "capacity: ")
  1373. capacity.append_in(tagged_out_memory)
  1374. tagged_out_memory.append(once "storage: %"")
  1375. utf8_encode_in(tagged_out_memory)
  1376. tagged_out_memory.append_character('%"')
  1377. end
  1378. feature {ANY} -- Other features:
  1379. first: UNICODE_CHARACTER is
  1380. -- The first character.
  1381. --
  1382. -- See also `last', `item'.
  1383. do
  1384. Result.set(g_utf8_get_char(handle))
  1385. end
  1386. last: INTEGER is
  1387. -- The last character.
  1388. --
  1389. -- See also `first', `item'.
  1390. do
  1391. end
  1392. substring (a_start_index, an_end_index: like lower): like Current is
  1393. -- New string consisting of items [`start_index'.. `end_index'].
  1394. --
  1395. -- See also `substring_index' and `copy_substring' to save memory.
  1396. require
  1397. valid_start_index: 1 <= start_index
  1398. valid_end_index: end_index <= count
  1399. meaningful_interval: start_index <= end_index + 1
  1400. local
  1401. location, end_point: POINTER
  1402. do
  1403. create Result.with_capacity(end_index - start_index + 1)
  1404. from location:=g_utf8_offset_to_pointer(handle,a_start_index.to_integer_32)
  1405. until
  1406. loop
  1407. end
  1408. ensure
  1409. substring_count: Result.count = end_index - start_index + 1
  1410. end
  1411. extend_multiple (unicode: INTEGER; n: INTEGER) is
  1412. -- Extend Current with `n' times character `unicode'.
  1413. require
  1414. n >= 0
  1415. valid_unicode_value: valid_unicode(unicode)
  1416. local
  1417. i: INTEGER
  1418. do
  1419. from
  1420. i := n
  1421. until
  1422. i = 0
  1423. loop
  1424. append_character(unicode)
  1425. i := i - 1
  1426. end
  1427. ensure
  1428. count = n + old count
  1429. end
  1430. precede_multiple (unicode: INTEGER; n: INTEGER) is
  1431. -- Prepend `n' times character `unicode' to Current.
  1432. require
  1433. n >= 0
  1434. valid_unicode_value: valid_unicode(unicode)
  1435. local
  1436. i: INTEGER
  1437. do
  1438. if n > 0 then
  1439. if count = 0 then
  1440. extend_multiple(unicode, n)
  1441. else
  1442. --|*** May be implemented in a more efficient way...
  1443. from
  1444. i := n
  1445. until
  1446. i = 0
  1447. loop
  1448. precede(unicode)
  1449. i := i - 1
  1450. end
  1451. end
  1452. end
  1453. ensure
  1454. count = n + old count
  1455. end
  1456. extend_to_count (unicode: INTEGER; needed_count: INTEGER) is
  1457. -- Extend Current with `unicode' until `needed_count' is reached.
  1458. -- Do nothing if `needed_count' is already greater or equal
  1459. -- to `count'.
  1460. require
  1461. needed_count >= 0
  1462. valid_unicode_value: valid_unicode(unicode)
  1463. do
  1464. if needed_count > count then
  1465. extend_multiple(unicode, needed_count - count)
  1466. end
  1467. ensure
  1468. count >= needed_count
  1469. end
  1470. precede_to_count (unicode: INTEGER; needed_count: INTEGER) is
  1471. -- Prepend `unicode' to Current until `needed_count' is reached.
  1472. -- Do nothing if `needed_count' is already greater or equal
  1473. -- to `count'.
  1474. require
  1475. needed_count >= 0
  1476. valid_unicode_value: valid_unicode(unicode)
  1477. do
  1478. if needed_count > count then
  1479. precede_multiple(unicode, needed_count - count)
  1480. end
  1481. ensure
  1482. count >= needed_count
  1483. end
  1484. reverse is
  1485. -- Reverse the string.
  1486. local
  1487. i1, i2: INTEGER
  1488. do
  1489. not_yet_implemented
  1490. --|*** reverse grapheme
  1491. from
  1492. i1 := 1
  1493. i2 := count
  1494. until
  1495. i1 >= i2
  1496. loop
  1497. swap(i1, i2)
  1498. i1 := i1 + 1
  1499. i2 := i2 - 1
  1500. end
  1501. end
  1502. remove_all_occurrences (unicode: INTEGER) is
  1503. -- Remove all occurrences of `unicode'.
  1504. --
  1505. -- See also `occurrences', `remove'.
  1506. require
  1507. valid_unicode_value: valid_unicode(unicode)
  1508. local
  1509. i: INTEGER
  1510. do
  1511. --|*** May be implemented in a more efficient way...
  1512. from
  1513. i := index_of(unicode, 1)
  1514. until
  1515. i = 0
  1516. loop
  1517. remove(i)
  1518. i := index_of(unicode, i)
  1519. end
  1520. ensure
  1521. count = old count - old occurrences(unicode)
  1522. end
  1523. substring_index (other: UNICODE_STRING; start_index: INTEGER): INTEGER is
  1524. -- Position of first occurrence of `other' at or after `start', 0 if none.
  1525. --
  1526. -- See also `substring', `first_substring_index'.
  1527. require
  1528. other_not_void: other /= Void
  1529. valid_start_index: start_index >= 1 and start_index <= count + 1
  1530. do
  1531. not_yet_implemented
  1532. end
  1533. first_substring_index (other: UNICODE_STRING): INTEGER is
  1534. -- Position of first occurrence of `other' at or after 1, 0 if none.
  1535. --
  1536. -- See also `substring_index'.
  1537. require
  1538. other_not_void: other /= Void
  1539. do
  1540. Result := substring_index(other, 1)
  1541. ensure
  1542. definition: Result = substring_index(other, 1)
  1543. end
  1544. feature {ANY} -- Splitting a STRING:
  1545. split: ARRAY[UTF8_STRING] is
  1546. -- Split the string into an array of words. Uses `is_separator'
  1547. -- to find words. Gives Void or a non empty array.
  1548. --
  1549. -- See also `split_in'.
  1550. do
  1551. if count > 0 then
  1552. split_buffer.clear_count
  1553. split_in(split_buffer)
  1554. if not split_buffer.is_empty then
  1555. Result := split_buffer.twin
  1556. end
  1557. end
  1558. ensure
  1559. Result /= Void implies not Result.is_empty
  1560. end
  1561. split_in (words: COLLECTION[UTF_STRING]) is
  1562. -- Same jobs as `split' but result is appended in `words'.
  1563. --
  1564. -- See also `split'.
  1565. require
  1566. words /= Void
  1567. do
  1568. ensure
  1569. words.count >= old words.count
  1570. end
  1571. get_new_iterator: ITERATOR[INTEGER] is
  1572. do
  1573. create {ITERATOR_ON_UNICODE_STRING} Result.make(Current)
  1574. end
  1575. feature {} -- Implementation
  1576. split_buffer: ARRAY[UTF8_STRING] is
  1577. once
  1578. create Result.with_capacity(4, 1)
  1579. end
  1580. feature {}
  1581. -- TODO: In UNICODE_STRING we have "manifest_initialize (c: like capacity; s: like storage; ls_cap: INTEGER; lsv: NATIVE_ARRAY[INTEGER_16]; lsi: NATIVE_ARRAY[INTEGER]) is -- This function is a compiler-hook automatically called when -- a manifest unicode string (i.e. U"foo") is used in the Eiffel -- source code." Provide an UTF8 equivalent
  1582. -- g_utf8_next_char()
  1583. -- #define g_utf8_next_char(p)
  1584. -- Skips to the next character in a UTF-8 string. The string must be valid; this
  1585. -- macro is as fast as possible, and has no error-checking. You would use this macro
  1586. -- to iterate over a string character by character. The macro returns the start of
  1587. -- the next UTF-8 character. Before using this macro, use g_utf8_validate() to
  1588. -- validate strings that may contain invalid UTF-8.
  1589. -- p : Pointer to the start of a valid UTF-8 character.
  1590. -- ---------------------------------------------------------------------------------
  1591. -- g_utf8_get_char ()
  1592. -- gunichar g_utf8_get_char (const gchar *p);
  1593. -- Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does
  1594. -- not point to a valid UTF-8 encoded character, results are undefined. If you are
  1595. -- not sure that the bytes are complete valid Unicode characters, you should use
  1596. -- g_utf8_get_char_validated() instead.
  1597. -- p : a pointer to Unicode character encoded as UTF-8
  1598. -- Returns : the resulting character
  1599. -- ---------------------------------------------------------------------------------
  1600. -- g_utf8_get_char_validated ()
  1601. -- gunichar g_utf8_get_char_validated (const gchar *p,
  1602. -- gssize max_len);
  1603. -- Convert a sequence of bytes encoded as UTF-8 to a Unicode character. This
  1604. -- function checks for incomplete characters, for invalid characters such as
  1605. -- characters that are out of the range of Unicode, and for overlong encodings of
  1606. -- valid characters.
  1607. -- p : a pointer to Unicode character encoded as UTF-8
  1608. -- max_len : the maximum number of bytes to read, or -1, for no maximum.
  1609. -- Returns : the resulting character. If p points to a partial sequence at the end
  1610. -- of a string that could begin a valid character, returns (gunichar)-2;
  1611. -- otherwise, if p does not point to a valid UTF-8 encoded Unicode
  1612. -- character, returns (gunichar)-1.
  1613. -- ---------------------------------------------------------------------------------
  1614. -- g_utf8_offset_to_pointer ()
  1615. -- gchar* g_utf8_offset_to_pointer (const gchar *str,
  1616. -- glong offset);
  1617. -- Converts from an integer character offset to a pointer to a position within the
  1618. -- string.
  1619. -- Since 2.10, this function allows to pass a negative offset to step backwards. It
  1620. -- is usually worth stepping backwards from the end instead of forwards if offset is
  1621. -- in the last fourth of the string, since moving forward is about 3 times faster
  1622. -- than moving backward.
  1623. -- str : a UTF-8 encoded string
  1624. -- offset : a character offset within str
  1625. -- Returns : the resulting pointer
  1626. -- ---------------------------------------------------------------------------------
  1627. -- g_utf8_pointer_to_offset ()
  1628. -- glong g_utf8_pointer_to_offset (const gchar *str,
  1629. -- const gchar *pos);
  1630. -- Converts from a pointer to position within a string to a integer character
  1631. -- offset.
  1632. -- Since 2.10, this function allows pos to be before str, and returns a negative
  1633. -- offset in this case.
  1634. -- str : a UTF-8 encoded string
  1635. -- pos : a pointer to a position within str
  1636. -- Returns : the resulting character offset
  1637. -- ---------------------------------------------------------------------------------
  1638. -- g_utf8_prev_char ()
  1639. -- gchar* g_utf8_prev_char (const gchar *p);
  1640. -- Finds the previous UTF-8 character in the string before p.
  1641. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  1642. -- see if the character found is actually valid other than it starts with an
  1643. -- appropriate byte. If p might be the first character of the string, you must use
  1644. -- g_utf8_find_prev_char() instead.
  1645. -- p : a pointer to a position within a UTF-8 encoded string
  1646. -- Returns : a pointer to the found character.
  1647. -- ---------------------------------------------------------------------------------
  1648. -- g_utf8_find_next_char ()
  1649. -- gchar* g_utf8_find_next_char (const gchar *p,
  1650. -- const gchar *end);
  1651. -- Finds the start of the next UTF-8 character in the string after p.
  1652. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  1653. -- see if the character found is actually valid other than it starts with an
  1654. -- appropriate byte.
  1655. -- p : a pointer to a position within a UTF-8 encoded string
  1656. -- end : a pointer to the end of the string, or NULL to indicate that the string
  1657. -- is nul-terminated, in which case the returned value will be
  1658. -- Returns : a pointer to the found character or NULL
  1659. -- ---------------------------------------------------------------------------------
  1660. -- g_utf8_find_prev_char ()
  1661. -- gchar* g_utf8_find_prev_char (const gchar *str,
  1662. -- const gchar *p);
  1663. -- Given a position p with a UTF-8 encoded string str, find the start of the
  1664. -- previous UTF-8 character starting before p. Returns NULL if no UTF-8 characters
  1665. -- are present in str before p.
  1666. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  1667. -- see if the character found is actually valid other than it starts with an
  1668. -- appropriate byte.
  1669. -- str : pointer to the beginning of a UTF-8 encoded string
  1670. -- p : pointer to some position within str
  1671. -- Returns : a pointer to the found character or NULL.
  1672. -- ---------------------------------------------------------------------------------
  1673. -- g_utf8_strlen ()
  1674. -- glong g_utf8_strlen (const gchar *p,
  1675. -- gssize max);
  1676. -- Returns the length of the string in characters.
  1677. -- p : pointer to the start of a UTF-8 encoded string.
  1678. -- max : the maximum number of bytes to examine. If max is less than 0, then the
  1679. -- string is assumed to be nul-terminated. If max is 0, p will not be
  1680. -- examined and may be NULL.
  1681. -- Returns : the length of the string in characters
  1682. -- ---------------------------------------------------------------------------------
  1683. -- g_utf8_strncpy ()
  1684. -- gchar* g_utf8_strncpy (gchar *dest,
  1685. -- const gchar *src,
  1686. -- gsize n);
  1687. -- Like the standard C strncpy() function, but copies a given number of characters
  1688. -- instead of a given number of bytes. The src string must be valid UTF-8 encoded
  1689. -- text. (Use g_utf8_validate() on all text before trying to use UTF-8 utility
  1690. -- functions with it.)
  1691. -- dest : buffer to fill with characters from src
  1692. -- src : UTF-8 encoded string
  1693. -- n : character count
  1694. -- Returns : dest
  1695. -- ---------------------------------------------------------------------------------
  1696. -- g_utf8_strchr ()
  1697. -- gchar* g_utf8_strchr (const gchar *p,
  1698. -- gssize len,
  1699. -- gunichar c);
  1700. -- Finds the leftmost occurrence of the given Unicode character in a UTF-8 encoded
  1701. -- string, while limiting the search to len bytes. If len is -1, allow unbounded
  1702. -- search.
  1703. -- p : a nul-terminated UTF-8 encoded string
  1704. -- len : the maximum length of p
  1705. -- c : a Unicode character
  1706. -- Returns : NULL if the string does not contain the character, otherwise, a pointer
  1707. -- to the start of the leftmost occurrence of the character in the string.
  1708. -- ---------------------------------------------------------------------------------
  1709. -- g_utf8_strrchr ()
  1710. -- gchar* g_utf8_strrchr (const gchar *p,
  1711. -- gssize len,
  1712. -- gunichar c);
  1713. -- Find the rightmost occurrence of the given Unicode character in a UTF-8 encoded
  1714. -- string, while limiting the search to len bytes. If len is -1, allow unbounded
  1715. -- search.
  1716. -- p : a nul-terminated UTF-8 encoded string
  1717. -- len : the maximum length of p
  1718. -- c : a Unicode character
  1719. -- Returns : NULL if the string does not contain the character, otherwise, a pointer
  1720. -- to the start of the rightmost occurrence of the character in the
  1721. -- string.
  1722. -- ---------------------------------------------------------------------------------
  1723. -- g_utf8_strreverse ()
  1724. -- gchar* g_utf8_strreverse (const gchar *str,
  1725. -- gssize len);
  1726. -- Reverses a UTF-8 string. str must be valid UTF-8 encoded text. (Use
  1727. -- g_utf8_validate() on all text before trying to use UTF-8 utility functions with
  1728. -- it.)
  1729. -- Note that unlike g_strreverse(), this function returns newly-allocated memory,
  1730. -- which should be freed with g_free() when no longer needed.
  1731. -- str : a UTF-8 encoded string
  1732. -- len : the maximum length of str to use. If len < 0, then the string is
  1733. -- nul-terminated.
  1734. -- Returns : a newly-allocated string which is the reverse of str.
  1735. -- Since 2.2
  1736. -- ---------------------------------------------------------------------------------
  1737. -- g_utf8_validate ()
  1738. -- gboolean g_utf8_validate (const gchar *str,
  1739. -- gssize max_len,
  1740. -- const gchar **end);
  1741. -- Validates UTF-8 encoded text. str is the text to validate; if str is
  1742. -- nul-terminated, then max_len can be -1, otherwise max_len should be the number of
  1743. -- bytes to validate. If end is non-NULL, then the end of the valid range will be
  1744. -- stored there (i.e. the start of the first invalid character if some bytes were
  1745. -- invalid, or the end of the text being validated otherwise).
  1746. -- Note that g_utf8_validate() returns FALSE if max_len is positive and NUL is met
  1747. -- before max_len bytes have been read.
  1748. -- Returns TRUE if all of str was valid. Many GLib and GTK+ routines require valid
  1749. -- UTF-8 as input; so data read from a file or the network should be checked with
  1750. -- g_utf8_validate() before doing anything else with it.
  1751. -- str : a pointer to character data
  1752. -- max_len : max bytes to validate, or -1 to go until NUL
  1753. -- end : return location for end of valid data
  1754. -- Returns : TRUE if the text was valid UTF-8
  1755. -- ---------------------------------------------------------------------------------
  1756. -- g_utf8_strup ()
  1757. -- gchar* g_utf8_strup (const gchar *str,
  1758. -- gssize len);
  1759. -- Converts all Unicode characters in the string that have a case to uppercase. The
  1760. -- exact manner that this is done depends on the current locale, and may result in
  1761. -- the number of characters in the string increasing. (For instance, the German
  1762. -- ess-zet will be changed to SS.)
  1763. -- str : a UTF-8 encoded string
  1764. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1765. -- Returns : a newly allocated string, with all characters converted to uppercase.
  1766. -- ---------------------------------------------------------------------------------
  1767. -- g_utf8_strdown ()
  1768. -- gchar* g_utf8_strdown (const gchar *str,
  1769. -- gssize len);
  1770. -- Converts all Unicode characters in the string that have a case to lowercase. The
  1771. -- exact manner that this is done depends on the current locale, and may result in
  1772. -- the number of characters in the string changing.
  1773. -- str : a UTF-8 encoded string
  1774. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1775. -- Returns : a newly allocated string, with all characters converted to lowercase.
  1776. -- ---------------------------------------------------------------------------------
  1777. -- g_utf8_casefold ()
  1778. -- gchar* g_utf8_casefold (const gchar *str,
  1779. -- gssize len);
  1780. -- Converts a string into a form that is independent of case. The result will not
  1781. -- correspond to any particular case, but can be compared for equality or ordered
  1782. -- with the results of calling g_utf8_casefold() on other strings.
  1783. -- Note that calling g_utf8_casefold() followed by g_utf8_collate() is only an
  1784. -- approximation to the correct linguistic case insensitive ordering, though it is a
  1785. -- fairly good one. Getting this exactly right would require a more sophisticated
  1786. -- collation function that takes case sensitivity into account. GLib does not
  1787. -- currently provide such a function.
  1788. -- str : a UTF-8 encoded string
  1789. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1790. -- Returns : a newly allocated string, that is a case independent form of str.
  1791. -- ---------------------------------------------------------------------------------
  1792. normalize (a_mode: G_NORMALIZE_MODE): UTF8_STRING is
  1793. -- Converts a string into canonical form, standardizing such issues as
  1794. -- whether a character with an accent is represented as a base
  1795. -- character and combining accent or as a single precomposed character.
  1796. -- You should generally call `normalize' before comparing two Unicode
  1797. -- strings.
  1798. -- "default" `a_normalization_mode' only standardizes differences that
  1799. -- do not affect the text content, such as the above-mentioned accent
  1800. -- representation. "all" also standardizes the "compatibility"
  1801. -- characters in Unicode, such as SUPERSCRIPT THREE to the standard
  1802. -- forms (in this case DIGIT THREE). Formatting information may be lost
  1803. -- but for most text operations such characters should be considered
  1804. -- the same. For example, `collate' normalizes with "all" as its first
  1805. -- step.
  1806. -- "compose" and "all_compose" are like "default" and "all", but
  1807. -- returned a result with composed forms rather than a maximally
  1808. -- decomposed form. This is often useful if you intend to convert the
  1809. -- string to a legacy encoding or pass it to a system with less capable
  1810. -- Unicode handling.
  1811. do
  1812. create Result.from_pointer
  1813. (g_utf8_normalize (handle, bytes_count, a_mode.value))
  1814. -- str : a UTF-8 encoded string.
  1815. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1816. -- mode : the type of normalization to perform.
  1817. -- Returns : a newly allocated string, that is the normalized form of str.
  1818. ensure not_void: Result/=Void
  1819. end
  1820. -- ---------------------------------------------------------------------------------
  1821. -- g_utf8_collate_key ()
  1822. -- gchar* g_utf8_collate_key (const gchar *str,
  1823. -- gssize len);
  1824. -- Converts a string into a collation key that can be compared with other collation
  1825. -- keys produced by the same function using strcmp(). The results of comparing the
  1826. -- collation keys of two strings with strcmp() will always be the same as comparing
  1827. -- the two original keys with g_utf8_collate().
  1828. -- str : a UTF-8 encoded string.
  1829. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1830. -- Returns : a newly allocated string. This string should be freed with g_free()
  1831. -- when you are done with it.
  1832. -- ---------------------------------------------------------------------------------
  1833. -- g_utf8_collate_key_for_filename ()
  1834. -- gchar* g_utf8_collate_key_for_filename (const gchar *str,
  1835. -- gssize len);
  1836. -- Converts a string into a collation key that can be compared with other collation
  1837. -- keys produced by the same function using strcmp().
  1838. -- In order to sort filenames correctly, this function treats the dot '.' as a
  1839. -- special case. Most dictionary orderings seem to consider it insignificant, thus
  1840. -- producing the ordering "event.c" "eventgenerator.c" "event.h" instead of
  1841. -- "event.c" "event.h" "eventgenerator.c". Also, we would like to treat numbers
  1842. -- intelligently so that "file1" "file10" "file5" is sorted as "file1" "file5"
  1843. -- "file10".
  1844. -- str : a UTF-8 encoded string.
  1845. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  1846. -- Returns : a newly allocated string. This string should be freed with g_free()
  1847. -- when you are done with it.
  1848. -- Since 2.8
  1849. feature {} -- Implementation
  1850. offset_to_pointer (an_offset: INTEGER_32): POINTER
  1851. -- A pointer to `an_offset' on Current
  1852. --Converts from an integer character offset to a pointer to a position
  1853. --within the string.
  1854. -- Since 2.10, this function allows to pass a negative offset to step
  1855. -- backwards. It is usually worth stepping backwards from the end
  1856. -- instead of forwards if offset is in the last fourth of the string,
  1857. -- since moving forward is about 3 times faster than moving backward.
  1858. do
  1859. Result := g_utf8_offset_to_pointer(handle,an_offset)
  1860. end
  1861. pointer_to_offset (a_pointer: POINTER): INTEGER_32 is
  1862. -- Converts from `a_pointer' (within string) to position within a
  1863. -- string to a integer character offset.
  1864. -- Since 2.10, this function allows pos to be before str, and returns a
  1865. -- negative offset in this case.
  1866. do
  1867. Result:=g_utf8_pointer_to_offset(handle,a_position)
  1868. end
  1869. feature {} -- Unwrapped code
  1870. -- g_utf8_to_utf16 ()
  1871. -- gunichar2* g_utf8_to_utf16 (const gchar *str,
  1872. -- glong len,
  1873. -- glong *items_read,
  1874. -- glong *items_written,
  1875. -- GError **error);
  1876. -- Convert a string from UTF-8 to UTF-16. A 0 character will be added to the result
  1877. -- after the converted text.
  1878. -- str : a UTF-8 encoded string
  1879. -- len : the maximum length (number of characters) of str to use. If len <
  1880. -- 0, then the string is nul-terminated.
  1881. -- items_read : location to store number of bytes read, or NULL. If NULL, then
  1882. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  1883. -- contains a trailing partial character. If an error occurs then
  1884. -- the index of the invalid input is stored here.
  1885. -- items_written : location to store number of gunichar2 written, or NULL. The value
  1886. -- stored here does not include the trailing 0.
  1887. -- error : location to store the error occuring, or NULL to ignore errors.
  1888. -- Any of the errors in GConvertError other than
  1889. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  1890. -- Returns : a pointer to a newly allocated UTF-16 string. This value must be
  1891. -- freed with g_free(). If an error occurs, NULL will be returned
  1892. -- and error set.
  1893. -- ---------------------------------------------------------------------------------
  1894. -- g_utf8_to_ucs4 ()
  1895. -- gunichar* g_utf8_to_ucs4 (const gchar *str,
  1896. -- glong len,
  1897. -- glong *items_read,
  1898. -- glong *items_written,
  1899. -- GError **error);
  1900. -- Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. A
  1901. -- trailing 0 will be added to the string after the converted text.
  1902. -- str : a UTF-8 encoded string
  1903. -- len : the maximum length of str to use. If len < 0, then the string is
  1904. -- nul-terminated.
  1905. -- items_read : location to store number of bytes read, or NULL. If NULL, then
  1906. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  1907. -- contains a trailing partial character. If an error occurs then
  1908. -- the index of the invalid input is stored here.
  1909. -- items_written : location to store number of characters written or NULL. The value
  1910. -- here stored does not include the trailing 0 character.
  1911. -- error : location to store the error occuring, or NULL to ignore errors.
  1912. -- Any of the errors in GConvertError other than
  1913. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  1914. -- Returns : a pointer to a newly allocated UCS-4 string. This value must be
  1915. -- freed with g_free(). If an error occurs, NULL will be returned
  1916. -- and error set.
  1917. -- ---------------------------------------------------------------------------------
  1918. -- g_utf8_to_ucs4_fast ()
  1919. -- gunichar* g_utf8_to_ucs4_fast (const gchar *str,
  1920. -- glong len,
  1921. -- glong *items_written);
  1922. -- Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4,
  1923. -- assuming valid UTF-8 input. This function is roughly twice as fast as
  1924. -- g_utf8_to_ucs4() but does no error checking on the input.
  1925. -- str : a UTF-8 encoded string
  1926. -- len : the maximum length of str to use. If len < 0, then the string is
  1927. -- nul-terminated.
  1928. -- items_written : location to store the number of characters in the result, or
  1929. -- NULL.
  1930. -- Returns : a pointer to a newly allocated UCS-4 string. This value must be
  1931. -- freed with g_free().
  1932. -- ---------------------------------------------------------------------------------
  1933. -- g_utf16_to_ucs4 ()
  1934. -- gunichar* g_utf16_to_ucs4 (const gunichar2 *str,
  1935. -- glong len,
  1936. -- glong *items_read,
  1937. -- glong *items_written,
  1938. -- GError **error);
  1939. -- Convert a string from UTF-16 to UCS-4. The result will be terminated with a 0
  1940. -- character.
  1941. -- str : a UTF-16 encoded string
  1942. -- len : the