PageRenderTime 58ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/src/lib/string/unicode_string.e

http://github.com/tybor/Liberty
Specman e | 1920 lines | 1489 code | 108 blank | 323 comment | 88 complexity | ae54c695b3e9b74c411709b56358f4d8 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1, GPL-2.0
  1. -- This file is part of a Liberty Eiffel library.
  2. -- See the full copyright at the end.
  3. --
  4. class UNICODE_STRING
  5. --
  6. -- WARNING: THIS CLASS IS A WORK IN PROGRESS. SOME FEATURE ARE NOT
  7. -- YET IMPLEMENTED AND SOME FEATURE MAY APPEAR/DISAPPEAR.
  8. --
  9. -- A UNICODE_STRING is a resizable string written with unicode values.
  10. -- From unicode.org: "Unicode provides a unique number for every
  11. -- character ,
  12. -- no matter what the platform,
  13. -- no matter what the program,
  14. -- no matter what the language.
  15. --
  16. -- WARNING: a grapheme may be described with many code.
  17. -- grapheme may be defined as "user character". Angstrom sign
  18. -- one grapheme but may be defined using (LETTER A + COMBINING RING).
  19. -- Unicode strings may be acceded in two ways:
  20. -- - low-level (code by code)
  21. -- - high-level (grapheme by grapheme)
  22. --
  23. -- Unless otherwise specified, all functions unit is the unicode number.
  24. --
  25. inherit
  26. HASHABLE
  27. redefine copy, out_in_tagged_out_memory, fill_tagged_out_memory
  28. end
  29. COMPARABLE
  30. redefine is_equal, copy, compare, three_way_comparison, out_in_tagged_out_memory, fill_tagged_out_memory
  31. end
  32. TRAVERSABLE[INTEGER]
  33. redefine is_equal, copy, out_in_tagged_out_memory, fill_tagged_out_memory, next_generation
  34. end
  35. SEARCHABLE[INTEGER]
  36. redefine is_equal, copy, out_in_tagged_out_memory, fill_tagged_out_memory
  37. end
  38. RECYCLABLE
  39. redefine is_equal, copy, out_in_tagged_out_memory, fill_tagged_out_memory
  40. end
  41. insert
  42. UNICODE_STRING_HELPER
  43. redefine is_equal, copy, out_in_tagged_out_memory, fill_tagged_out_memory
  44. end
  45. create {ANY}
  46. make, copy, make_empty, make_filled, from_utf8
  47. feature {UNICODE_STRING, UNICODE_STRING_HANDLER}
  48. storage: NATIVE_ARRAY[INTEGER_16]
  49. -- The place where characters are stored.
  50. -- WARNING: it's only `storage' area. Each Unicode value
  51. -- stored using 2 bytes (CHARACTER). Encoding used is UTF-16NE.
  52. -- low surrogates are stored in other way for direct access.
  53. feature {ANY}
  54. count: INTEGER
  55. -- String length which is also the maximum valid index.
  56. --
  57. -- See also `is_empty', `lower', `upper'.
  58. capacity: INTEGER
  59. -- Capacity of the `storage' area.
  60. lower: INTEGER 1
  61. -- Minimum index; actually, this is always 1 (this feature
  62. -- here to mimic the one of the COLLECTION hierarchy).
  63. --
  64. -- See also `upper', `valid_index', `item'.
  65. upper: INTEGER
  66. -- Maximum index; actually the same value as `count' (this
  67. -- feature is here to mimic the one of the COLLECTION hierarchy).
  68. --
  69. -- See also `lower', `valid_index', `item'.
  70. do
  71. Result := count
  72. ensure
  73. Result = count
  74. end
  75. feature {ANY} -- Creation / Modification:
  76. make (needed_capacity: INTEGER)
  77. -- Initialize the string to have at least `needed_capacity'
  78. -- characters of storage.
  79. require
  80. non_negative_size: needed_capacity >= 0
  81. do
  82. if needed_capacity > 0 then
  83. if capacity < needed_capacity then
  84. storage := storage.calloc(needed_capacity)
  85. capacity := needed_capacity
  86. end
  87. end
  88. count := 0
  89. if low_surrogate_indexes = Void then
  90. create low_surrogate_indexes.make(0)
  91. create low_surrogate_values.make(0)
  92. else
  93. low_surrogate_indexes.make(0)
  94. low_surrogate_values.make(0)
  95. end
  96. next_generation
  97. ensure
  98. needed_capacity <= capacity
  99. empty_string: count = 0
  100. end
  101. make_empty
  102. -- Create an empty string.
  103. do
  104. make(0)
  105. end
  106. make_filled (unicode: INTEGER; n: INTEGER)
  107. -- Initialize string with `n' copies of `unicode'.
  108. require
  109. valid_count: n >= 0
  110. valid_unicode_value: valid_unicode(unicode)
  111. do
  112. make(n)
  113. count := n
  114. fill_with(unicode)
  115. ensure
  116. count_set: count = n
  117. filled: occurrences(unicode) = count
  118. end
  119. feature {ANY} -- Testing:
  120. is_empty: BOOLEAN
  121. -- Has string length 0?
  122. --
  123. -- See also `count'.
  124. do
  125. Result := count = 0
  126. end
  127. item (i: INTEGER): INTEGER
  128. -- Get unicode at position `i'.
  129. --
  130. -- See also `lower', `upper', `valid_index', `put'.
  131. local
  132. n: INTEGER
  133. do
  134. n := storage.item(i - 1)
  135. if n & 0x0000F800 = 0x0000D800 then
  136. Result := n & 0x000007FF + 64
  137. Result := Result * 1024 + low_surrogate_value(i)
  138. else
  139. Result := n & 0x0000FFFF
  140. end
  141. end
  142. infix "@" (i: INTEGER): INTEGER
  143. -- The infix notation which is actually just a synonym for `item'.
  144. --
  145. -- See also `item', `put'.
  146. require
  147. valid_index(i)
  148. do
  149. Result := item(i)
  150. ensure
  151. definition: Result = item(i)
  152. end
  153. hash_code: INTEGER
  154. local
  155. i, j: INTEGER
  156. do
  157. from
  158. j := count
  159. until
  160. j <= 0
  161. loop
  162. Result := Result #* 5 #+ storage.item(i)
  163. i := i + 1
  164. j := j - 1
  165. end
  166. from
  167. j := low_surrogate_values.upper
  168. until
  169. j <= low_surrogate_values.lower
  170. loop
  171. Result := Result #* 5 #+ low_surrogate_values.item(j)
  172. j := j - 1
  173. end
  174. if Result < 0 then
  175. Result := -(Result + 1)
  176. end
  177. end
  178. infix "<" (other: like Current): BOOLEAN
  179. -- Is `Current' less than `other'?
  180. --
  181. -- See also `>', `<=', `>=', `min', `max'.
  182. local
  183. i: INTEGER; maxi: INTEGER
  184. do
  185. from
  186. i := 1
  187. maxi := count.min(other.count)
  188. until
  189. i > maxi or else item(i) /= other.item(i)
  190. loop
  191. i := i + 1
  192. end
  193. if i <= maxi then
  194. Result := item(i) < other.item(i)
  195. else
  196. Result := i <= other.count
  197. end
  198. --not_yet_implemented
  199. end
  200. compare, three_way_comparison (other: like Current): INTEGER
  201. do
  202. not_yet_implemented
  203. -- redefine needed ?
  204. end
  205. is_equal (other: like Current): BOOLEAN
  206. -- Do both strings have the same character sequence?
  207. --
  208. -- See also `same_as'.
  209. do
  210. if other = Current then
  211. Result := True
  212. else
  213. if count = other.count and then low_surrogate_values.is_equal(other.low_surrogate_values) and then low_surrogate_indexes.is_equal(other.low_surrogate_indexes) then
  214. Result := storage.fast_memcmp(other.storage, count)
  215. end
  216. if not Result then
  217. --not_yet_implemented
  218. end
  219. end
  220. end
  221. same_as (other: UNICODE_STRING): BOOLEAN
  222. -- Case insensitive `is_equal'.
  223. require
  224. other /= Void
  225. do
  226. not_yet_implemented
  227. end
  228. index_of, fast_index_of (unicode: INTEGER; start_index: INTEGER): INTEGER
  229. -- Index of first occurrence of `unicode' at or after `start_index',
  230. -- 0 if none.
  231. --
  232. -- See also `reverse_index_of', `first_index_of', `last_index_of', `has'.
  233. require
  234. valid_unicode_value: valid_unicode(unicode)
  235. local
  236. code: INTEGER_16; remainder: INTEGER_16; i: INTEGER
  237. do
  238. if unicode >= 0x00010000 then
  239. -- stored as high and low surrogate
  240. code := (unicode #// 1024 - 64).low_16
  241. remainder := (unicode & 0x000003FF).to_integer_16
  242. from
  243. i := 0
  244. until
  245. i > low_surrogate_indexes.upper or else low_surrogate_indexes.item(i) >= start_index
  246. loop
  247. i := i + 1
  248. end
  249. from
  250. until
  251. i > low_surrogate_indexes.upper or else low_surrogate_values.item(i) = remainder and then storage.item(low_surrogate_indexes.item(i) - 1) = code
  252. loop
  253. i := i + 1
  254. end
  255. if i <= low_surrogate_indexes.upper then
  256. Result := low_surrogate_indexes.item(i)
  257. end
  258. else
  259. -- not surrogate character
  260. code := unicode.low_16
  261. from
  262. i := start_index - 1
  263. Result := start_index
  264. until
  265. Result > count or else storage.item(i) = code
  266. loop
  267. Result := Result + 1
  268. i := i + 1
  269. end
  270. if Result > count then
  271. Result := 0
  272. end
  273. end
  274. ensure then
  275. Result /= 0 implies item(Result) = unicode
  276. end
  277. reverse_index_of, fast_reverse_index_of (unicode: INTEGER; start_index: INTEGER): INTEGER
  278. -- Index of first occurrence of `unicode' at or before `start_index', 0 if none.
  279. -- The search is done in reverse direction, which means from the `start_index' down
  280. -- to the first character.
  281. --
  282. -- See also `index_of', `last_index_of', `first_index_of'.
  283. --require
  284. -- valid_start_index: start_index >= 0 and start_index <= count
  285. -- valid_unicode_value: valid_unicode(unicode)
  286. do
  287. from
  288. Result := start_index
  289. until
  290. Result = 0 or else unicode = item(Result)
  291. loop
  292. Result := Result - 1
  293. end
  294. ensure then
  295. Result /= 0 implies item(Result) = unicode
  296. end
  297. first_index_of, fast_first_index_of (unicode: INTEGER): INTEGER
  298. -- Index of first occurrence of `unicode' at index 1 or after index 1.
  299. --
  300. -- See also `last_index_of', `index_of', `reverse_index_of'.
  301. do
  302. Result := index_of(unicode, 1)
  303. end
  304. last_index_of, fast_last_index_of (unicode: INTEGER): INTEGER
  305. -- Index of last occurrence of `unicode', 0 if none.
  306. --
  307. -- See also `first_index_of', `reverse_index_of', `index_of'.
  308. do
  309. Result := reverse_index_of(unicode, upper)
  310. end
  311. has, fast_has (unicode: INTEGER): BOOLEAN
  312. -- True if `unicode' is in the STRING.
  313. --
  314. -- See also `index_of', `occurrences', `has_substring'.
  315. require
  316. valid_unicode_value: valid_unicode(unicode)
  317. do
  318. Result := index_of(unicode, 1) /= 0
  319. end
  320. has_substring (other: UNICODE_STRING): BOOLEAN
  321. -- True if `Current' contains `other'.
  322. --
  323. -- See also `substring_index', `has'.
  324. require
  325. other_not_void: other /= Void
  326. do
  327. Result := substring_index(other, 1) /= 0
  328. end
  329. occurrences (unicode: INTEGER): INTEGER
  330. -- Number of times character `unicode' appears in the string.
  331. --
  332. -- See also `remove_all_occurrences', `has'.
  333. require
  334. valid_unicode_value: valid_unicode(unicode)
  335. local
  336. i: INTEGER
  337. do
  338. from
  339. i := index_of(unicode, 1)
  340. until
  341. i = 0
  342. loop
  343. Result := Result + 1
  344. i := index_of(unicode, i)
  345. end
  346. ensure
  347. Result >= 0
  348. end
  349. has_suffix (s: UNICODE_STRING): BOOLEAN
  350. -- True if suffix of `Current' is `s'.
  351. --
  352. -- See also `remove_suffix', `has_prefix', `has_substring'.
  353. require
  354. s /= Void
  355. local
  356. i, offset: INTEGER
  357. do
  358. offset := count - s.count
  359. from
  360. Result := offset >= 0
  361. i := lower
  362. until
  363. not Result or else i > s.upper
  364. loop
  365. Result := item(i + offset) = s.item(i)
  366. i := i + 1
  367. end
  368. end
  369. has_prefix (p: UNICODE_STRING): BOOLEAN
  370. -- True if prefix of `Current' is `p'.
  371. require
  372. p /= Void
  373. local
  374. i: INTEGER
  375. do
  376. from
  377. Result := count >= p.count
  378. i := lower
  379. until
  380. not Result or else i > p.upper
  381. loop
  382. Result := item(i) = p.item(i)
  383. i := i + 1
  384. end
  385. end
  386. feature {ANY} -- Testing and Conversion:
  387. is_ascii: BOOLEAN
  388. -- True if all unicode value is in range 0..127
  389. local
  390. i: INTEGER
  391. do
  392. from
  393. i := count - 1
  394. until
  395. i < 0 or else storage.item(i) & 0xFF80 /= 0
  396. loop
  397. i := i - 1
  398. end
  399. Result := i < 0
  400. end
  401. to_utf8: STRING
  402. -- New string is created, current unicode string is encoded
  403. -- with UTF-8 format.
  404. --
  405. -- See also: `utf8_encode_in' and `as_utf8' to save memory.
  406. do
  407. tmp_buffer.clear_count
  408. utf8_encode_in(tmp_buffer)
  409. Result := tmp_buffer.twin
  410. end
  411. as_utf8: STRING
  412. -- Encode the string in UTF-8. Always returns the same once object.
  413. --
  414. -- See also: `to_utf8', `utf8_encode_in'.
  415. do
  416. Result := once ""
  417. Result.clear_count
  418. utf8_encode_in(Result)
  419. end
  420. utf8_encode_in (s: STRING)
  421. -- Append the string in UTF-8 to `s'.
  422. --
  423. -- See also: `to_utf8', `as_utf8'.
  424. require
  425. s /= Void
  426. local
  427. i: INTEGER
  428. do
  429. from
  430. i := 1
  431. until
  432. i > count
  433. loop
  434. utf8_character_in(item(i), s)
  435. i := i + 1
  436. end
  437. end
  438. utf16be_encode_in (s: STRING)
  439. -- Append the string in UTF-16BE to `s'
  440. require
  441. s /= Void
  442. local
  443. i, k: INTEGER; v: INTEGER_16
  444. do
  445. from
  446. until
  447. i >= count
  448. loop
  449. v := storage.item(i)
  450. s.extend((v |>>> 8).to_character)
  451. s.extend((v & 0x00FF).to_character)
  452. if v & 0xF800 = 0xD800 then
  453. check
  454. low_surrogate_indexes.item(k) = i + 1
  455. end
  456. s.extend((low_surrogate_values.item(k) #// 256 + 220).to_character)
  457. s.extend((low_surrogate_values.item(k) & 0x00FF).to_character)
  458. k := k + 1
  459. end
  460. i := i + 1
  461. end
  462. end
  463. utf8_decode_from (s: ABSTRACT_STRING): BOOLEAN
  464. -- Use `s' as UTF-8 format encoded unicode string
  465. -- Return `False' if decoding process failed
  466. require
  467. s /= Void
  468. local
  469. i, k, seq_length: INTEGER; v: INTEGER
  470. do
  471. from
  472. Result := True
  473. i := 1
  474. until
  475. i > s.count
  476. loop
  477. v := s.item(i).code
  478. i := i + 1
  479. inspect
  480. v
  481. when 0 .. 127 then
  482. extend(v)
  483. k := 0
  484. when 192 .. 223 then
  485. v := v - 192
  486. k := 2
  487. when 224 .. 239 then
  488. v := v - 224
  489. k := 3
  490. when 240 .. 247 then
  491. v := v - 240
  492. k := 4
  493. else
  494. extend(65533)
  495. Result := False
  496. k := 0
  497. end
  498. from
  499. seq_length := k
  500. until
  501. k <= 1
  502. loop
  503. if i <= s.count and then s.item(i).code.in_range(128, 191) then
  504. v := v * 64 + s.item(i).code - 128
  505. i := i + 1
  506. k := k - 1
  507. else
  508. extend(65533)
  509. Result := False
  510. k := 0
  511. end
  512. end
  513. if k = 1 then
  514. if v < 128 or else v < 2048 and then seq_length > 2 or else v < 65536 and then seq_length > 3 then
  515. -- overlong sequence, must be refused by any UTF-8
  516. -- compliant decode for security reasons.
  517. extend(65533)
  518. Result := False
  519. elseif not valid_unicode(v) then
  520. extend(65533)
  521. Result := False
  522. else
  523. extend(v)
  524. end
  525. end
  526. end
  527. end
  528. feature {}
  529. from_utf8 (s: ABSTRACT_STRING)
  530. -- Use `s' as UTF-8 format encoded unicode string
  531. -- This function may be used for manifest strings
  532. -- See `utf8_decode_from' for error detection
  533. require
  534. s /= Void
  535. local
  536. error: BOOLEAN
  537. do
  538. make(s.count)
  539. error := utf8_decode_from(s)
  540. end
  541. feature {ANY} -- Modification:
  542. resize (new_count: INTEGER)
  543. -- Resize Current. When `new_count' is greater than
  544. -- `count', new positions are initialized with unicode 0.
  545. require
  546. new_count >= 0
  547. local
  548. i: INTEGER
  549. do
  550. if new_count <= count then
  551. elseif capacity < new_count then
  552. if capacity = 0 then
  553. storage := storage.calloc(new_count)
  554. else
  555. storage := storage.realloc(capacity, new_count)
  556. end
  557. capacity := new_count
  558. else
  559. storage.clear(count, new_count - 1)
  560. end
  561. count := new_count
  562. from
  563. i := low_surrogate_indexes.upper
  564. until
  565. i < 0 or else low_surrogate_indexes.item(i) <= new_count
  566. loop
  567. --TODO: only one remove out of the loop
  568. low_surrogate_indexes.remove_last
  569. low_surrogate_values.remove_last
  570. i := i - 1
  571. end
  572. next_generation
  573. ensure
  574. count = new_count
  575. capacity >= old capacity
  576. end
  577. clear_count, wipe_out
  578. -- Discard all characters so that `is_empty' is True after that call.
  579. -- The internal `capacity' is not changed by this call (i.e. the internal `storage' memory
  580. -- neither released nor shrunk).
  581. --
  582. -- See also `clear_count_and_capacity'.
  583. do
  584. count := 0
  585. low_surrogate_indexes.make(0)
  586. low_surrogate_values.make(0)
  587. next_generation
  588. ensure
  589. is_empty: count = 0
  590. capacity = old capacity
  591. end
  592. clear_count_and_capacity
  593. -- Discard all characters (`is_empty' is True after that call). The internal `capacity' may also be
  594. -- reduced after this call.
  595. --
  596. -- See also `clear_count'.
  597. do
  598. low_surrogate_indexes.clear_count_and_capacity
  599. low_surrogate_values.clear_count_and_capacity
  600. clear_count
  601. --*** capacity := 0
  602. --*** storage := null_storage
  603. next_generation
  604. ensure
  605. is_empty: count = 0
  606. capacity = 0
  607. end
  608. copy (other: like Current)
  609. -- Copy `other' onto Current.
  610. --
  611. -- See also `copy_substring'.
  612. do
  613. count := other.count
  614. if count > 0 then
  615. if capacity < count then
  616. storage := storage.calloc(count)
  617. capacity := count
  618. end
  619. storage.copy_from(other.storage, count - 1)
  620. end
  621. if low_surrogate_indexes = Void then
  622. create low_surrogate_indexes.make(0)
  623. create low_surrogate_values.make(0)
  624. end
  625. low_surrogate_indexes.copy(other.low_surrogate_indexes)
  626. low_surrogate_values.copy(other.low_surrogate_values)
  627. next_generation
  628. ensure then
  629. count = other.count
  630. end
  631. copy_substring (s: like Current; start_index, end_index: INTEGER)
  632. -- Copy the substring from `s' from `start_index' to `end_index'
  633. -- to Current.
  634. --
  635. -- See also `copy'.
  636. --|*** DUMB IMPLEMENTATION
  637. require
  638. string_not_void: s /= Void
  639. valid_start_index: 1 <= start_index
  640. valid_end_index: end_index <= s.count
  641. meaningful_interval: start_index <= end_index + 1
  642. do
  643. clear_count
  644. append_substring(s, start_index, end_index)
  645. end
  646. fill_with (unicode: INTEGER)
  647. -- Replace every unicode with the new value.
  648. require
  649. valid_unicode_value: valid_unicode(unicode)
  650. local
  651. i: INTEGER; code: INTEGER_16; remainder: INTEGER_16
  652. do
  653. if unicode >= 65536 then
  654. -- stored as high and low surrogate
  655. code := (unicode #// 1024 - 64).low_16
  656. remainder := (unicode & 0x000003FF).to_integer_16 --unicode #\\ 1024
  657. storage.set_all_with(code, count - 1)
  658. low_surrogate_values.resize(count)
  659. low_surrogate_values.set_all_with(remainder)
  660. from
  661. i := count - 1
  662. low_surrogate_indexes.resize(count)
  663. until
  664. i < 0
  665. loop
  666. low_surrogate_indexes.put(i + 1, i)
  667. i := i - 1
  668. end
  669. else
  670. code := unicode.low_16
  671. storage.set_all_with(code, count - 1)
  672. low_surrogate_values.resize(0)
  673. low_surrogate_indexes.resize(0)
  674. end
  675. next_generation
  676. ensure
  677. occurrences(unicode) = count
  678. end
  679. replace_all (old_code, new_code: like item)
  680. -- Replace all occurrences of the element `old_code' by `new_code'.
  681. require
  682. valid_unicode_value: valid_unicode(old_code)
  683. valid_unicode_value: valid_unicode(new_code)
  684. local
  685. i: INTEGER
  686. do
  687. --*** May be implemented in a more efficient way...
  688. if old_code /= new_code then
  689. from
  690. i := index_of(old_code, 1)
  691. until
  692. i = 0
  693. loop
  694. put(new_code, i)
  695. i := index_of(old_code, i + 1)
  696. end
  697. end
  698. next_generation
  699. ensure
  700. count = old count
  701. old_code /= new_code implies occurrences(old_code) = 0
  702. end
  703. append, append_string (s: UNICODE_STRING)
  704. -- Append a copy of 's' to `Current'.
  705. --
  706. -- See also `add_last', `add_first', `prepend', '+'.
  707. require
  708. s_not_void: s /= Void
  709. local
  710. s_count, needed_capacity, new_capacity, i: INTEGER; indexes: FAST_ARRAY[INTEGER]
  711. do
  712. s_count := s.count
  713. needed_capacity := count + s_count
  714. if needed_capacity > capacity then
  715. if capacity = 0 then
  716. storage := storage.calloc(needed_capacity)
  717. capacity := needed_capacity
  718. else
  719. new_capacity := (2 * capacity).max(needed_capacity)
  720. storage := storage.realloc(capacity, new_capacity)
  721. capacity := new_capacity
  722. end
  723. end
  724. storage.copy_at(count, s.storage, s_count)
  725. from
  726. indexes := s.low_surrogate_indexes
  727. until
  728. i > indexes.upper
  729. loop
  730. low_surrogate_indexes.add_last(indexes.item(i) + count)
  731. low_surrogate_values.add_last(s.low_surrogate_values.item(i))
  732. i := i + 1
  733. end
  734. count := needed_capacity
  735. next_generation
  736. end
  737. append_substring (s: like Current; start_index, end_index: INTEGER)
  738. -- Append the substring from `s' from `start_index' to `end_index'
  739. -- to Current.
  740. --|*** DUMB IMPLEMENTATION
  741. require
  742. string_not_void: s /= Void
  743. valid_start_index: 1 <= start_index
  744. valid_end_index: end_index <= s.count
  745. meaningful_interval: start_index <= end_index + 1
  746. local
  747. i: INTEGER
  748. do
  749. from
  750. i := start_index
  751. until
  752. i > end_index
  753. loop
  754. extend(s.item(i))
  755. i := i + 1
  756. end
  757. end
  758. prepend (other: UNICODE_STRING)
  759. -- Prepend `other' to `Current'.
  760. --
  761. -- See also `append'.
  762. require
  763. other /= Void
  764. local
  765. i, j, k: INTEGER
  766. do
  767. i := count
  768. j := other.count
  769. resize(i + j)
  770. if i > 0 and then j > 0 then
  771. storage.move(0, i - 1, j)
  772. from
  773. k := low_surrogate_indexes.upper
  774. until
  775. k < 0
  776. loop
  777. low_surrogate_indexes.put(low_surrogate_indexes.item(k) + j, k)
  778. k := k - 1
  779. end
  780. end
  781. -- May be implemented in a more efficient way...
  782. from
  783. k := other.low_surrogate_indexes.upper
  784. until
  785. k < 0
  786. loop
  787. low_surrogate_indexes.add_first(other.low_surrogate_indexes.item(k))
  788. low_surrogate_values.add_first(other.low_surrogate_values.item(k))
  789. k := k - 1
  790. end
  791. storage.copy_from(other.storage, j - 1)
  792. next_generation
  793. ensure
  794. (old other.twin + old Current.twin).is_equal(Current)
  795. end
  796. insert_string (s: UNICODE_STRING; i: INTEGER)
  797. -- Insert `s' at index `i', shifting characters from index `i'
  798. -- to `count' rightwards.
  799. require
  800. string_not_void: s /= Void
  801. valid_insertion_index: 1 <= i and i <= count + 1
  802. local
  803. j, k: INTEGER; pos, n: INTEGER
  804. do
  805. j := count
  806. k := s.count
  807. resize(j + k)
  808. if i <= j then
  809. storage.move(i - 1, j - 1, k)
  810. end
  811. storage.copy_at(i - 1, s.storage, k)
  812. pos := low_surrogate_position(i)
  813. j := low_surrogate_indexes.count + s.low_surrogate_indexes.count
  814. low_surrogate_indexes.resize(j)
  815. low_surrogate_values.resize(j)
  816. from
  817. -- move existing surrogates and adjust indexes
  818. n := s.low_surrogate_indexes.upper
  819. until
  820. n < 0
  821. loop
  822. j := j - 1
  823. low_surrogate_indexes.put(low_surrogate_indexes.item(pos + n) + k, j)
  824. low_surrogate_values.put(low_surrogate_values.item(pos + n), j)
  825. n := n - 1
  826. end
  827. from
  828. -- copy surrogates from s and adjust indexes
  829. n := s.low_surrogate_indexes.upper
  830. j := pos + n
  831. until
  832. n < 0
  833. loop
  834. low_surrogate_indexes.put(s.low_surrogate_indexes.item(n) + i, j)
  835. low_surrogate_values.put(s.low_surrogate_values.item(n), j)
  836. j := j - 1
  837. n := n - 1
  838. end
  839. next_generation
  840. end
  841. replace_substring (s: UNICODE_STRING; start_index, end_index: INTEGER)
  842. -- Replace the substring from `start_index' to `end_index',
  843. -- inclusive, with `s'.
  844. require
  845. string_not_void: s /= Void
  846. valid_start_index: 1 <= start_index
  847. valid_end_index: end_index <= count
  848. meaningful_interval: start_index <= end_index + 1
  849. do
  850. -- May be implemented in a more efficient way...
  851. remove_between(start_index, end_index)
  852. insert_string(s, start_index)
  853. end
  854. infix "+" (other: UNICODE_STRING): like Current
  855. -- Create a new UNICODE_STRING which is the concatenation of
  856. -- `Current' and `other'.
  857. --
  858. -- See also `append'.
  859. require
  860. other_exists: other /= Void
  861. do
  862. create Result.make(count + other.count)
  863. Result.append(Current)
  864. Result.append(other)
  865. ensure
  866. result_count: Result.count = count + other.count
  867. end
  868. put (unicode: INTEGER; i: INTEGER)
  869. -- Put `unicode' at position `i'.
  870. --
  871. -- See also `item', `lower', `upper', `swap'.
  872. require
  873. valid_index: valid_index(i)
  874. valid_unicode_value: valid_unicode(unicode)
  875. local
  876. v, n: INTEGER
  877. do
  878. if unicode >= 65536 then
  879. -- stored as high and low surrogate
  880. v := unicode #// 1024 - 64
  881. if storage.item(i - 1) & 0xF800 = 0xD800 then
  882. low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, low_surrogate_index(i))
  883. else
  884. n := low_surrogate_position(i)
  885. low_surrogate_indexes.add_last(0)
  886. low_surrogate_values.add_last(0)
  887. if n /= low_surrogate_indexes.upper then
  888. low_surrogate_indexes.move(n, low_surrogate_indexes.upper - 1, 1)
  889. low_surrogate_values.move(n, low_surrogate_values.upper - 1, 1)
  890. end
  891. low_surrogate_indexes.put(i, n)
  892. low_surrogate_values.put((unicode & 0x000003FF).to_integer_16, n)
  893. end
  894. storage.put(v.low_16, i - 1)
  895. else
  896. if storage.item(i - 1) & 0xF800 = 0xD800 then
  897. v := low_surrogate_index(i)
  898. low_surrogate_indexes.remove(v)
  899. low_surrogate_values.remove(v)
  900. end
  901. storage.put(unicode.low_16, i - 1)
  902. end
  903. next_generation
  904. ensure
  905. item(i) = unicode
  906. end
  907. swap (i1, i2: INTEGER)
  908. -- Swap two characters.
  909. --
  910. -- See also `item', `put'.
  911. require
  912. valid_index(i1)
  913. valid_index(i2)
  914. local
  915. tmp: INTEGER_16; j1, j2: INTEGER; low_tmp: INTEGER_16; k1, k2: INTEGER
  916. do
  917. j1 := i1 - 1
  918. j2 := i2 - 1
  919. tmp := storage.item(j1)
  920. if tmp & 0xF800 = 0xD800 then
  921. if storage.item(j2) & 0xF800 = 0xD800 then
  922. k1 := low_surrogate_index(i1)
  923. k2 := low_surrogate_index(i2)
  924. low_tmp := low_surrogate_values.item(k1)
  925. low_surrogate_values.put(low_surrogate_values.item(k2), k1)
  926. low_surrogate_values.put(low_tmp, k2)
  927. low_surrogate_indexes.put(i2, k1)
  928. low_surrogate_indexes.put(i1, k2)
  929. else
  930. low_tmp := low_surrogate_values.item(k1)
  931. k1 := low_surrogate_index(i1)
  932. k2 := low_surrogate_position(i2)
  933. if k2 > k1 + 1 then
  934. low_surrogate_indexes.move(k1 + 1, k2 - 1, -1)
  935. low_surrogate_values.move(k1 + 1, k2 - 1, -1)
  936. k2 := k2 - 1
  937. elseif k1 > k2 then
  938. low_surrogate_indexes.move(k2, k1 - 1, 1)
  939. low_surrogate_values.move(k2, k1 - 1, 1)
  940. --else no move
  941. end
  942. low_surrogate_indexes.put(i1, k2)
  943. low_surrogate_values.put(low_tmp, k2)
  944. end
  945. else
  946. if storage.item(j2) & 0xF800 = 0xD800 then
  947. low_tmp := low_surrogate_values.item(k2)
  948. k1 := low_surrogate_position(i1)
  949. k2 := low_surrogate_index(i2)
  950. if k1 > k2 + 1 then
  951. low_surrogate_indexes.move(k2 + 1, k1 - 1, -1)
  952. low_surrogate_values.move(k2 + 1, k1 - 1, -1)
  953. k1 := k1 - 1
  954. elseif k2 > k1 then
  955. low_surrogate_indexes.move(k1, k2 - 1, 1)
  956. low_surrogate_values.move(k1, k2 - 1, 1)
  957. --else no move
  958. end
  959. low_surrogate_indexes.put(i2, k1)
  960. low_surrogate_values.put(low_tmp, k1)
  961. -- else i1 and i2 are not surrogate
  962. end
  963. end
  964. storage.put(storage.item(j2), j1)
  965. storage.put(tmp, j2)
  966. next_generation
  967. ensure
  968. item(i1) = old item(i2)
  969. item(i2) = old item(i1)
  970. end
  971. insert_character (unicode: INTEGER; i: INTEGER)
  972. -- Inserts `unicode' at index `i', shifting characters from
  973. -- position 'i' to `count' rightwards.
  974. require
  975. valid_insertion_index: 1 <= i and i <= count + 1
  976. valid_unicode_value: valid_unicode(unicode)
  977. local
  978. j, k: INTEGER
  979. do
  980. k := low_surrogate_position(i)
  981. from
  982. j := low_surrogate_indexes.upper
  983. until
  984. j < k
  985. loop
  986. low_surrogate_indexes.put(low_surrogate_indexes.item(j) + 1, j)
  987. j := j - 1
  988. end
  989. resize(count + 1)
  990. if count > 1 then
  991. storage.move(i - 1, count - 2, 1)
  992. storage.put(0, i - 1)
  993. end
  994. put(unicode, i)
  995. ensure
  996. item(i) = unicode
  997. end
  998. shrink (min_index, max_index: INTEGER)
  999. -- Keep only the slice [`min_index' .. `max_index'] or nothing
  1000. -- when the slice is empty.
  1001. require
  1002. 1 <= min_index
  1003. max_index <= count
  1004. min_index <= max_index + 1
  1005. local
  1006. i, j: INTEGER
  1007. do
  1008. if max_index < min_index then
  1009. count := 0
  1010. low_surrogate_indexes.make(0)
  1011. low_surrogate_values.make(0)
  1012. elseif min_index = 1 then
  1013. count := max_index
  1014. i := low_surrogate_position(count)
  1015. if i <= low_surrogate_indexes.upper then
  1016. if low_surrogate_indexes.item(i) = max_index then
  1017. i := i + 1
  1018. end
  1019. end
  1020. low_surrogate_indexes.resize(i)
  1021. low_surrogate_values.resize(i)
  1022. else
  1023. storage.slice_copy(0, storage, min_index - 1, max_index - 1)
  1024. from
  1025. i := low_surrogate_position(min_index)
  1026. until
  1027. i > low_surrogate_indexes.upper or else low_surrogate_indexes.item(i) > max_index
  1028. loop
  1029. low_surrogate_indexes.put(low_surrogate_indexes.item(i) - min_index + 1, j)
  1030. low_surrogate_values.put(low_surrogate_values.item(i), j)
  1031. j := j + 1
  1032. i := i + 1
  1033. end
  1034. low_surrogate_indexes.resize(j)
  1035. low_surrogate_values.resize(j)
  1036. count := max_index - min_index + 1
  1037. end
  1038. next_generation
  1039. ensure
  1040. count = max_index - min_index + 1
  1041. end
  1042. remove (i: INTEGER)
  1043. -- Remove character at position `i'.
  1044. --
  1045. -- See also `remove_head', `remove_between', `remove_suffix', `remove_prefix'.
  1046. require
  1047. valid_removal_index: valid_index(i)
  1048. do
  1049. remove_between(i, i)
  1050. ensure
  1051. count = old count - 1
  1052. end
  1053. add_first, precede (unicode: INTEGER)
  1054. -- Add `unicode' at first position.
  1055. --
  1056. -- See also `add_last'.
  1057. require
  1058. valid_unicode_value: valid_unicode(unicode)
  1059. local
  1060. i: INTEGER
  1061. do
  1062. from
  1063. i := low_surrogate_indexes.upper
  1064. until
  1065. i < 0
  1066. loop
  1067. low_surrogate_indexes.put(low_surrogate_indexes.item(i) + 1, i)
  1068. i := i - 1
  1069. end
  1070. resize(count + 1)
  1071. if count > 1 then
  1072. storage.move(0, count - 2, 1)
  1073. storage.put(0, 0)
  1074. end
  1075. put(unicode, 1)
  1076. ensure
  1077. count = 1 + old count
  1078. item(1) = unicode
  1079. end
  1080. add_last, append_character, extend (unicode: INTEGER)
  1081. -- Append `unicode' to string.
  1082. --
  1083. -- See also `add_first'.
  1084. require
  1085. valid_unicode_value: valid_unicode(unicode)
  1086. local
  1087. new_capacity: INTEGER
  1088. do
  1089. if capacity > count then
  1090. elseif capacity = 0 then
  1091. new_capacity := 32
  1092. storage := storage.calloc(new_capacity)
  1093. capacity := new_capacity
  1094. else
  1095. new_capacity := 2 * capacity
  1096. storage := storage.realloc(capacity, new_capacity)
  1097. capacity := new_capacity
  1098. end
  1099. if unicode >= 65536 then
  1100. -- stored as high and low surrogate
  1101. low_surrogate_indexes.add_last(count)
  1102. low_surrogate_values.add_last((unicode & 0x000003FF).to_integer_16)
  1103. storage.put((unicode #// 1024 - 64).low_16, count)
  1104. else
  1105. storage.put(unicode.low_16, count)
  1106. end
  1107. count := count + 1
  1108. next_generation
  1109. ensure
  1110. count = 1 + old count
  1111. item(count) = unicode
  1112. end
  1113. to_lower
  1114. -- Convert all characters to lower case.
  1115. --
  1116. -- See also `to_upper', `as_lower', `as_upper'.
  1117. do
  1118. not_yet_implemented
  1119. end
  1120. to_upper
  1121. -- Convert all characters to upper case.
  1122. --
  1123. -- See also `to_lower', `as_upper', `as_lower'.
  1124. do
  1125. not_yet_implemented
  1126. end
  1127. as_lower: like Current
  1128. -- New object with all letters in lower case.
  1129. --
  1130. -- See also `as_upper', `to_lower', `to_upper'.
  1131. do
  1132. create Result.copy(Current)
  1133. Result.to_lower
  1134. end
  1135. as_upper: like Current
  1136. -- New object with all letters in upper case.
  1137. --
  1138. -- See also `as_lower', `to_upper', `to_lower'.
  1139. do
  1140. create Result.copy(Current)
  1141. Result.to_upper
  1142. end
  1143. keep_head (n: INTEGER)
  1144. -- Remove all characters except for the first `n'.
  1145. -- Do nothing if `n' >= `count'.
  1146. --
  1147. -- See also `keep_tail', `remove_head', `remove_tail'.
  1148. require
  1149. n_non_negative: n >= 0
  1150. do
  1151. if n < count then
  1152. remove_tail(count - n)
  1153. end
  1154. ensure
  1155. count = n.min(old count)
  1156. end
  1157. keep_tail (n: INTEGER)
  1158. -- Remove all characters except for the last `n'.
  1159. -- Do nothing if `n' >= `count'.
  1160. --
  1161. -- See also `keep_head', `remove_tail', `remove_head'.
  1162. require
  1163. n_non_negative: n >= 0
  1164. do
  1165. if n < count then
  1166. remove_head(count - n)
  1167. end
  1168. ensure
  1169. count = n.min(old count)
  1170. end
  1171. remove_first
  1172. -- Remove the `first' item.
  1173. --
  1174. -- See also `remove_head', `remove_last', `remove'.
  1175. require
  1176. not is_empty
  1177. do
  1178. --*** May be improved?
  1179. remove_between(1, 1)
  1180. ensure
  1181. count = old count - 1
  1182. end
  1183. remove_head (n: INTEGER)
  1184. -- Remove `n' first characters. If `n' >= `count', remove all.
  1185. --
  1186. -- See also `remove_tail', `remove', `remove_the_first'.
  1187. require
  1188. n_non_negative: n >= 0
  1189. do
  1190. if n > count then
  1191. count := 0
  1192. low_surrogate_indexes.make(0)
  1193. low_surrogate_values.make(0)
  1194. else
  1195. if n > 0 then
  1196. remove_between(1, n)
  1197. end
  1198. end
  1199. next_generation
  1200. ensure
  1201. count = (old count - n).max(0)
  1202. end
  1203. remove_last
  1204. -- Remove the `last' item.
  1205. --
  1206. -- See also `remove_tail', `remove_first', `remove'.
  1207. require
  1208. not is_empty
  1209. do
  1210. --*** May be improved
  1211. remove_tail(1)
  1212. ensure
  1213. count = old count - 1
  1214. end
  1215. remove_tail (n: INTEGER)
  1216. -- Remove `n' last characters. If `n' >= `count', remove all.
  1217. --
  1218. -- See also `remove_head', `remove', `remove_the_last'.
  1219. require
  1220. n_non_negative: n >= 0
  1221. local
  1222. i: INTEGER
  1223. do
  1224. if n > count then
  1225. count := 0
  1226. low_surrogate_indexes.make(0)
  1227. low_surrogate_values.make(0)
  1228. else
  1229. count := count - n
  1230. i := low_surrogate_position(count + 1)
  1231. low_surrogate_indexes.resize(i)
  1232. low_surrogate_values.resize(i)
  1233. end
  1234. next_generation
  1235. ensure
  1236. count = (old count - n).max(0)
  1237. end
  1238. remove_substring, remove_between (start_index, end_index: INTEGER)
  1239. -- Remove all characters from `strt_index' to `end_index' inclusive.
  1240. require
  1241. valid_start_index: 1 <= start_index
  1242. valid_end_index: end_index <= count
  1243. meaningful_interval: start_index <= end_index + 1
  1244. local
  1245. i, k, len: INTEGER
  1246. do
  1247. len := end_index - start_index + 1
  1248. if len > 0 then
  1249. from
  1250. i := low_surrogate_position(start_index)
  1251. k := low_surrogate_position(end_index + 1)
  1252. until
  1253. k > low_surrogate_indexes.upper
  1254. loop
  1255. low_surrogate_indexes.put(low_surrogate_indexes.item(k) - len, i)
  1256. low_surrogate_values.put(low_surrogate_values.item(k), i)
  1257. k := k + 1
  1258. i := i + 1
  1259. end
  1260. low_surrogate_indexes.resize(i)
  1261. low_surrogate_values.resize(i)
  1262. storage.slice_copy(start_index - 1, storage, end_index, count - 1)
  1263. count := count - len
  1264. end
  1265. next_generation
  1266. ensure
  1267. count = old count - (end_index - start_index + 1)
  1268. end
  1269. remove_suffix (s: UNICODE_STRING)
  1270. -- Remove the suffix `s' of current string.
  1271. --
  1272. -- See also `remove_prefix', `remove_tail', `remove'.
  1273. require
  1274. has_suffix(s)
  1275. do
  1276. not_yet_implemented
  1277. -- remove_last(s.count); equal sequence may have different size
  1278. ensure
  1279. (old Current.twin).is_equal(Current + old s.twin)
  1280. end
  1281. remove_prefix (s: UNICODE_STRING)
  1282. -- Remove the prefix `s' of current string.
  1283. --
  1284. -- See also `remove_suffix', `remove_head', `remove'.
  1285. require
  1286. has_prefix(s)
  1287. do
  1288. not_yet_implemented
  1289. -- remove_head(s.count); equal sequence may have different size
  1290. ensure
  1291. (old Current.twin).is_equal(old s.twin + Current)
  1292. end
  1293. left_adjust
  1294. -- Remove leading blanks.
  1295. --
  1296. -- See also `remove_head', `first'.
  1297. local
  1298. i: INTEGER
  1299. do
  1300. from
  1301. i := 1
  1302. until
  1303. i > count or else not is_space(item(i -- not_yet_implemented -- handle combining characters
  1304. ))
  1305. loop
  1306. i := i + 1
  1307. end
  1308. remove_head(i - 1)
  1309. ensure
  1310. -- not_yet_implemented -- handle combining characters
  1311. stripped: is_empty or else not is_space(first)
  1312. end
  1313. right_adjust
  1314. -- Remove trailing blanks.
  1315. --
  1316. -- See also `remove_tail', `last'.
  1317. local
  1318. i: INTEGER
  1319. do
  1320. from
  1321. until
  1322. count = 0 or else not is_space(item(count -- not_yet_implemented -- handle combining characters
  1323. ))
  1324. loop
  1325. count := count - 1
  1326. end
  1327. i := low_surrogate_position(count + 1)
  1328. low_surrogate_indexes.resize(i)
  1329. low_surrogate_values.resize(i)
  1330. next_generation
  1331. ensure
  1332. -- not_yet_implemented -- handle combining characters
  1333. stripped: is_empty or else not is_space(last)
  1334. end
  1335. feature {ANY} -- Printing:
  1336. out_in_tagged_out_memory
  1337. do
  1338. utf8_encode_in(tagged_out_memory)
  1339. end
  1340. fill_tagged_out_memory
  1341. do
  1342. tagged_out_memory.append(once "count: ")
  1343. count.append_in(tagged_out_memory)
  1344. tagged_out_memory.append(once "capacity: ")
  1345. capacity.append_in(tagged_out_memory)
  1346. tagged_out_memory.append(once "storage: %"")
  1347. utf8_encode_in(tagged_out_memory)
  1348. tagged_out_memory.append_character('%"')
  1349. end
  1350. feature {ANY} -- Other features:
  1351. first: INTEGER
  1352. -- Access to the very `first' character.
  1353. --
  1354. -- See also `last', `item'.
  1355. local
  1356. n: INTEGER
  1357. do
  1358. n := storage.item(0)
  1359. if n & 0x0000F800 = 0x0000D800 then
  1360. check
  1361. low_surrogate_indexes.item(0) = 1
  1362. end
  1363. Result := n & 0x000007FF + 64
  1364. Result := Result * 1024 + low_surrogate_values.item(0)
  1365. else
  1366. Result := n & 0x0000FFFF
  1367. end
  1368. end
  1369. last: INTEGER
  1370. -- Access to the very `last' character.
  1371. --
  1372. -- See also `first', `item'.
  1373. local
  1374. n: INTEGER
  1375. do
  1376. n := storage.item(count - 1)
  1377. if n & 0x0000F800 = 0x0000D800 then
  1378. Result := n & 0x000007FF + 64
  1379. Result := Result * 1024 + low_surrogate_value(count)
  1380. else
  1381. Result := n & 0x0000FFFF
  1382. end
  1383. end
  1384. substring (start_index, end_index: INTEGER): like Current
  1385. -- New string consisting of items [`start_index'.. `end_index'].
  1386. --
  1387. -- See also `substring_index' and `copy_substring' to save memory.
  1388. require
  1389. valid_start_index: 1 <= start_index
  1390. valid_end_index: end_index <= count
  1391. meaningful_interval: start_index <= end_index + 1
  1392. local
  1393. i: INTEGER; c: like storage; lsi: FAST_ARRAY[INTEGER]; lsv: FAST_ARRAY[INTEGER_16]
  1394. do
  1395. create Result.make(end_index - start_index + 1)
  1396. Result.set_count(end_index - start_index + 1)
  1397. c := Result.storage
  1398. lsi := Result.low_surrogate_indexes
  1399. lsv := Result.low_surrogate_values
  1400. c.slice_copy(0, storage, start_index - 1, end_index - 1)
  1401. from
  1402. i := low_surrogate_position(start_index)
  1403. until
  1404. i > low_surrogate_indexes.upper or else low_surrogate_indexes.item(i) > end_index
  1405. loop
  1406. lsi.add_last(low_surrogate_indexes.item(i) - start_index)
  1407. lsv.add_last(low_surrogate_values.item(i))
  1408. i := i + 1
  1409. end
  1410. ensure
  1411. substring_count: Result.count = end_index - start_index + 1
  1412. end
  1413. extend_multiple (unicode: INTEGER; n: INTEGER)
  1414. -- Extend Current with `n' times character `unicode'.
  1415. require
  1416. n >= 0
  1417. valid_unicode_value: valid_unicode(unicode)
  1418. local
  1419. i: INTEGER
  1420. do
  1421. from
  1422. i := n
  1423. until
  1424. i = 0
  1425. loop
  1426. append_character(unicode)
  1427. i := i - 1
  1428. end
  1429. ensure
  1430. count = n + old count
  1431. end
  1432. precede_multiple (unicode: INTEGER; n: INTEGER)
  1433. -- Prepend `n' times character `unicode' to Current.
  1434. require
  1435. n >= 0
  1436. valid_unicode_value: valid_unicode(unicode)
  1437. local
  1438. i: INTEGER
  1439. do
  1440. if n > 0 then
  1441. if count = 0 then
  1442. extend_multiple(unicode, n)
  1443. else
  1444. --|*** May be implemented in a more efficient way...
  1445. from
  1446. i := n
  1447. until
  1448. i = 0
  1449. loop
  1450. precede(unicode)
  1451. i := i - 1
  1452. end
  1453. end
  1454. end
  1455. ensure
  1456. count = n + old count
  1457. end
  1458. extend_to_count (unicode: INTEGER; needed_count: INTEGER)
  1459. -- Extend Current with `unicode' until `needed_count' is reached.
  1460. -- Do nothing if `needed_count' is already greater or equal
  1461. -- to `count'.
  1462. require
  1463. needed_count >= 0
  1464. valid_unicode_value: valid_unicode(unicode)
  1465. do
  1466. if needed_count > count then
  1467. extend_multiple(unicode, needed_count - count)
  1468. end
  1469. ensure
  1470. count >= needed_count
  1471. end
  1472. precede_to_count (unicode: INTEGER; needed_count: INTEGER)
  1473. -- Prepend `unicode' to Current until `needed_count' is reached.
  1474. -- Do nothing if `needed_count' is already greater or equal
  1475. -- to `count'.
  1476. require
  1477. needed_count >= 0
  1478. valid_unicode_value: valid_unicode(unicode)
  1479. do
  1480. if needed_count > count then
  1481. precede_multiple(unicode, needed_count - count)
  1482. end
  1483. ensure
  1484. count >= needed_count
  1485. end
  1486. reverse
  1487. -- Reverse the string.
  1488. local
  1489. i1, i2: INTEGER
  1490. do
  1491. not_yet_implemented
  1492. --|*** reverse grapheme
  1493. from
  1494. i1 := 1
  1495. i2 := count
  1496. until
  1497. i1 >= i2
  1498. loop
  1499. swap(i1, i2)
  1500. i1 := i1 + 1
  1501. i2 := i2 - 1
  1502. end
  1503. end
  1504. remove_all_occurrences (unicode: INTEGER)
  1505. -- Remove all occurrences of `unicode'.
  1506. --
  1507. -- See also `occurrences', `remove'.
  1508. require
  1509. valid_unicode_value: valid_unicode(unicode)
  1510. local
  1511. i: INTEGER
  1512. do
  1513. --|*** May be implemented in a more efficient way...
  1514. from
  1515. i := index_of(unicode, 1)
  1516. until
  1517. i = 0
  1518. loop
  1519. remove(i)
  1520. i := index_of(unicode, i)
  1521. end
  1522. ensure
  1523. count = old count - old occurrences(unicode)
  1524. end
  1525. substring_index (other: UNICODE_STRING; start_index: INTEGER): INTEGER
  1526. -- Position of first occurrence of `other' at or after `start', 0 if none.
  1527. --
  1528. -- See also `substring', `first_substring_index'.
  1529. require
  1530. other_not_void: other /= Void
  1531. valid_start_index: start_index >= 1 and start_index <= count + 1
  1532. do
  1533. not_yet_implemented
  1534. end
  1535. first_substring_index (other: UNICODE_STRING): INTEGER
  1536. -- Position of first occurrence of `other' at or after 1, 0 if none.
  1537. --
  1538. -- See also `substring_index'.
  1539. require
  1540. other_not_void: other /= Void
  1541. do
  1542. Result := substring_index(other, 1)
  1543. ensure
  1544. definition: Result = substring_index(other, 1)
  1545. end
  1546. feature {ANY} -- Splitting a STRING:
  1547. split: ARRAY[UNICODE_STRING]
  1548. -- Split the string into an array of words. Uses `is_separator'
  1549. -- to find words. Gives Void or a non empty array.
  1550. --
  1551. -- See also `split_in'.
  1552. do
  1553. if count > 0 then
  1554. split_buffer.clear_count
  1555. split_in(split_buffer)
  1556. if not split_buffer.is_empty then
  1557. Result := split_buffer.twin
  1558. end
  1559. end
  1560. ensure
  1561. Result /= Void implies not Result.is_empty
  1562. end
  1563. split_in (words: COLLECTION[UNICODE_STRING])
  1564. -- Same jobs as `split' but result is appended in `words'.
  1565. --
  1566. -- See also `split'.
  1567. require
  1568. words /= Void
  1569. local
  1570. state, i: INTEGER; unicode: INTEGER
  1571. do
  1572. -- state = 0: waiting next word.
  1573. -- state = 1: inside a new word.
  1574. -- not_yet_implemented --|*** handle combining characters
  1575. if count > 0 then
  1576. from
  1577. i := 1
  1578. until
  1579. i > count
  1580. loop
  1581. unicode := item(i)
  1582. if state = 0 then
  1583. if not is_separator(unicode) then
  1584. string_buffer.clear_count
  1585. string_buffer.append_character(unicode)
  1586. state := 1
  1587. end
  1588. else
  1589. if not is_separator(unicode) then
  1590. string_buffer.append_character(unicode)
  1591. else
  1592. words.add_last(string_buffer.twin)
  1593. state := 0
  1594. end
  1595. end
  1596. i := i + 1
  1597. end
  1598. if state = 1 then
  1599. words.add_last(string_buffer.twin)
  1600. end
  1601. end
  1602. ensure
  1603. words.count >= old words.count
  1604. end
  1605. feature {ANY} -- Other features:
  1606. extend_unless (unicode: INTEGER)
  1607. -- Extend `Current' (using `extend') with `unicode' unless
  1608. -- unicode `ch' is already the `last' character.
  1609. require
  1610. valid_unicode_value: valid_unicode(unicode)
  1611. do
  1612. if count = 0 or else item(count) /= unicode then
  1613. append_character(unicode)
  1614. end
  1615. ensure
  1616. last = unicode
  1617. count >= old count
  1618. end
  1619. new_iterator: ITERATOR[INTEGER]
  1620. do
  1621. create {ITERATOR_ON_UNICODE_STRING} Result.make(Current)
  1622. end
  1623. valid_unicode (unicode: INTEGER): BOOLEAN
  1624. do
  1625. Result := unicode.in_range(0, 0x0010FFFF) and then not unicode.in_range(0x0000D800, 0x0000DFFF) and then unicode /= 0x0000FFFE and then unicode /= 0x0000FFFF
  1626. -- surrogates
  1627. -- reverse BOM
  1628. -- not valid unicode value
  1629. end
  1630. is_space (unicode: INTEGER): BOOLEAN
  1631. do
  1632. -- not_yet_implemented; should handle combining characters
  1633. Result := unicode = ' '.code
  1634. end
  1635. is_separator (unicode: INTEGER): BOOLEAN
  1636. do
  1637. -- not_yet_implemented; should handle combining characters
  1638. Result := unicode = ' '.code
  1639. end
  1640. is_combining (unicode: INTEGER): BOOLEAN
  1641. do
  1642. not_yet_implemented
  1643. -- 0x0300 -> 0x036f
  1644. -- 0x20d0 -> 0x20ff
  1645. -- 0xfe20 -> 0xfe2f
  1646. end
  1647. feature {UNICODE_STRING, UNICODE_STRING_HANDLER}
  1648. low_surrogate_indexes: FAST_ARRAY[INTEGER]
  1649. -- user indexes (starting at 1)
  1650. low_surrogate_values: FAST_ARRAY[INTEGER_16]
  1651. -- low surrogate value is stored without 0xDC00 part and
  1652. -- endianness dependant !
  1653. set_count (new_count: INTEGER)
  1654. require
  1655. new_count <= capacity
  1656. do
  1657. count := new_count
  1658. end
  1659. feature {}
  1660. string_buffer: UNICODE_STRING
  1661. -- Private, temporary once buffer.
  1662. once
  1663. create Result.make(256)
  1664. end
  1665. tmp_buffer: STRING
  1666. -- Private, temporary once buffer.
  1667. once
  1668. create Result.make(256)
  1669. end
  1670. split_buffer: ARRAY[UNICODE_STRING]
  1671. once
  1672. create Result.with_capacity(4, 1)
  1673. end
  1674. low_surrogate_value (index: INTEGER): INTEGER_16
  1675. require
  1676. storage.item(index) & 0xF800 = 0xD800
  1677. do
  1678. Result := low_surrogate_values.item(low_surrogate_index(index))
  1679. ensure
  1680. Result.in_range(0, 1023)
  1681. end
  1682. low_surrogate_index (index: INTEGER): INTEGER
  1683. require
  1684. low_surrogate_indexes.has(index)
  1685. do
  1686. --|*** Should use dichotomic search
  1687. Result := low_surrogate_indexes.fast_first_index_of(index)
  1688. ensure
  1689. low_surrogate_values.valid_index(Result)
  1690. end
  1691. low_surrogate_position (index: INTEGER): INTEGER
  1692. -- return position to use in low_surrogate* arrays relative to
  1693. -- character at `index' in the string (return the good answer
  1694. -- if the corresponding character is not surrogate)
  1695. do
  1696. -- Should use dichotomic search
  1697. from
  1698. until
  1699. Result > low_surrogate_indexes.upper or else low_surrogate_indexes.item(Result) >= index
  1700. loop
  1701. Result := Result + 1
  1702. end
  1703. ensure
  1704. low_surrogate_indexes.is_empty implies Result = 0
  1705. Result <= low_surrogate_indexes.upper + 1
  1706. Result >= low_surrogate_indexes.lower
  1707. Result > low_surrogate_indexes.lower implies low_surrogate_indexes.item(Result - 1) < index
  1708. Result <= low_surrogate_indexes.upper implies low_surrogate_indexes.item(Result + 1) >= index
  1709. end
  1710. valid_surrogates: BOOLEAN
  1711. local
  1712. i, j: INTEGER
  1713. do
  1714. from
  1715. Result := True
  1716. until
  1717. i >= count
  1718. loop
  1719. if storage.item(i) & 0xF800 = 0xD800 then
  1720. if low_surrogate_indexes.item(j) /= i + 1 then
  1721. Result := False
  1722. end
  1723. if storage.item(i) <= 0xDC00 then
  1724. -- negative!
  1725. Result := False
  1726. end
  1727. j := j + 1
  1728. end
  1729. i := i + 1
  1730. end
  1731. if low_surrogate_indexes.count /= j then
  1732. Result := False
  1733. end
  1734. end
  1735. feature {}
  1736. manifest_initialize (c: like capacity; s: like storage; ls_cap: INTEGER; lsv: NATIVE_ARRAY[INTEGER_16]
  1737. lsi: NATIVE_ARRAY[INTEGER])
  1738. -- This function is a compiler-hook automatically called when
  1739. -- a manifest unicode string (i.e. U"foo") is used in the Eiffel
  1740. -- source code.
  1741. local
  1742. i: INTEGER
  1743. do
  1744. if c > 0 then
  1745. storage := storage.calloc(c)
  1746. storage.copy_from(s, c - 1)
  1747. end
  1748. capacity := c
  1749. count := c
  1750. from
  1751. create low_surrogate_indexes.make(ls_cap)
  1752. create low_surrogate_values.make(ls_cap)
  1753. until
  1754. i >= ls_cap
  1755. loop
  1756. --|*** TODO: array copy may be improved using
  1757. --|NATIVE_ARRAY.copy_from. Need to force new upper value
  1758. --|in FAST_ARRAY.
  1759. low_surrogate_indexes.add_last(lsi.item(i))
  1760. low_surrogate_values.add_last(lsv.item(i))
  1761. i := i + 1
  1762. end
  1763. end
  1764. feature {}
  1765. debug_utf8: STRING
  1766. set_debug_utf8
  1767. do
  1768. if debug_utf8 = Void then
  1769. debug_utf8 := to_utf8
  1770. else
  1771. debug_utf8.copy(as_utf8)
  1772. end
  1773. end
  1774. next_generation
  1775. do
  1776. Precursor
  1777. debug("UNICODE_STRING")
  1778. set_debug_utf8
  1779. end
  1780. end
  1781. feature {RECYCLING_POOL}
  1782. recycle
  1783. do
  1784. clear_count
  1785. end
  1786. invariant
  1787. 0 <= count
  1788. count <= capacity
  1789. capacity > 0 implies storage.is_not_null
  1790. low_surrogate_values.count = low_surrogate_indexes.count
  1791. valid_surrogates
  1792. end -- class UNICODE_STRING
  1793. --
  1794. -- Copyright (C) 2009-2017: by all the people cited in the AUTHORS file.
  1795. --
  1796. -- Permission is hereby granted, free of charge, to any person obtaining a copy
  1797. -- of this software and associated documentation files (the "Software"), to deal
  1798. -- in the Software without restriction, including without limitation the rights
  1799. -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  1800. -- copies of the Software, and to permit persons to whom the Software is
  1801. -- furnished to do so, subject to the following conditions:
  1802. --
  1803. -- The above copyright notice and this permission notice shall be included in
  1804. -- all copies or substantial portions of the Software.
  1805. --
  1806. -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  1807. -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  1808. -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  1809. -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  1810. -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  1811. -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  1812. -- THE SOFTWARE.